diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 28e13fee4..48e7191e4 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -10,6 +10,7 @@ use crate::dataflow::DataflowConstraints; use crate::equiv::GlobalEquivSet; use crate::equiv::LocalEquivSet; use crate::labeled_ty::LabeledTyCtxt; +use crate::last_use::{self, LastUse}; use crate::panic_detail; use crate::panic_detail::PanicDetail; use crate::pointee_type; @@ -536,6 +537,31 @@ fn get_skip_pointee_defs() -> io::Result> { Ok(skip_pointee) } +fn get_rewrite_mode(tcx: TyCtxt, pointwise_fn_ldid: Option) -> rewrite::UpdateFiles { + let mut update_files = rewrite::UpdateFiles::No; + if let Ok(val) = env::var("C2RUST_ANALYZE_REWRITE_MODE") { + match val.as_str() { + "none" => {} + "inplace" => { + update_files = rewrite::UpdateFiles::InPlace; + } + "alongside" => { + update_files = rewrite::UpdateFiles::Alongside; + } + "pointwise" => { + let pointwise_fn_ldid = pointwise_fn_ldid.expect( + "C2RUST_ANALYZE_REWRITE_MODE=pointwise, \ + but pointwise_fn_ldid is unset?", + ); + let pointwise_fn_name = tcx.item_name(pointwise_fn_ldid.to_def_id()); + update_files = rewrite::UpdateFiles::AlongsidePointwise(pointwise_fn_name); + } + _ => panic!("bad value {:?} for C2RUST_ANALYZE_REWRITE_MODE", val), + } + } + update_files +} + /// Local information, specific to a single function. Many of the data structures we use for /// the pointer analysis have a "global" part that's shared between all functions and a "local" /// part that's specific to the function being analyzed; this struct contains only the local @@ -558,6 +584,9 @@ struct FuncInfo<'tcx> { local_pointee_types: MaybeUnset>>, /// Table for looking up the most recent write to a given local. recent_writes: MaybeUnset, + /// Analysis result indicating which uses of each local are actually the last use of that + /// local. + last_use: MaybeUnset, } fn run(tcx: TyCtxt) { @@ -616,6 +645,7 @@ fn run(tcx: TyCtxt) { // ---------------------------------- do_recent_writes(&gacx, &mut func_info, &all_fn_ldids); + do_last_use(&gacx, &mut func_info, &all_fn_ldids); // ---------------------------------- // Remap `PointerId`s by equivalence class @@ -1096,6 +1126,17 @@ fn run(tcx: TyCtxt) { return; } + if env::var("C2RUST_ANALYZE_DEBUG_LAST_USE").is_ok() { + let mut ann = AnnotationBuffer::new(tcx); + debug_annotate_last_use(&gacx, &func_info, &all_fn_ldids, &mut ann); + let annotations = ann.finish(); + let update_files = get_rewrite_mode(tcx, None); + eprintln!("update mode = {:?}", update_files); + rewrite::apply_rewrites(tcx, Vec::new(), annotations, update_files); + eprintln!("finished writing last_use annotations - exiting"); + return; + } + if !rewrite_pointwise { run2( None, @@ -1239,6 +1280,7 @@ fn run2<'tcx>( &mut acx, &asn, pointee_types, + &info.last_use, ldid.to_def_id(), &mir, hir_body_id, @@ -1547,27 +1589,7 @@ fn run2<'tcx>( let annotations = ann.finish(); // Apply rewrite to all functions at once. - let mut update_files = rewrite::UpdateFiles::No; - if let Ok(val) = env::var("C2RUST_ANALYZE_REWRITE_MODE") { - match val.as_str() { - "none" => {} - "inplace" => { - update_files = rewrite::UpdateFiles::InPlace; - } - "alongside" => { - update_files = rewrite::UpdateFiles::Alongside; - } - "pointwise" => { - let pointwise_fn_ldid = pointwise_fn_ldid.expect( - "C2RUST_ANALYZE_REWRITE_MODE=pointwise, \ - but pointwise_fn_ldid is unset?", - ); - let pointwise_fn_name = tcx.item_name(pointwise_fn_ldid.to_def_id()); - update_files = rewrite::UpdateFiles::AlongsidePointwise(pointwise_fn_name); - } - _ => panic!("bad value {:?} for C2RUST_ANALYZE_REWRITE_MODE", val), - } - } + let update_files = get_rewrite_mode(tcx, pointwise_fn_ldid); rewrite::apply_rewrites(tcx, all_rewrites, annotations, update_files); // ---------------------------------- @@ -1856,6 +1878,69 @@ fn do_recent_writes<'tcx>( } } +fn do_last_use<'tcx>( + gacx: &GlobalAnalysisCtxt<'tcx>, + func_info: &mut HashMap>, + all_fn_ldids: &[LocalDefId], +) { + let tcx = gacx.tcx; + for &ldid in all_fn_ldids { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let info = func_info.get_mut(&ldid).unwrap(); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + + // This is very straightforward because it doesn't need an `AnalysisCtxt` and never fails. + info.last_use.set(last_use::calc_last_use(&mir)); + } +} + +fn debug_annotate_last_use<'tcx>( + gacx: &GlobalAnalysisCtxt<'tcx>, + func_info: &HashMap>, + all_fn_ldids: &[LocalDefId], + ann: &mut AnnotationBuffer, +) { + let tcx = gacx.tcx; + for &ldid in all_fn_ldids { + let ldid_const = WithOptConstParam::unknown(ldid); + let info = match func_info.get(&ldid) { + Some(x) => x, + None => continue, + }; + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + + if !info.last_use.is_set() { + continue; + } + let last_use = info.last_use.get(); + let mut last_use = last_use.iter().collect::>(); + last_use.sort(); + for (loc, which, local) in last_use { + let span = mir + .stmt_at(loc) + .either(|stmt| stmt.source_info.span, |term| term.source_info.span); + ann.emit( + span, + format!( + "{which:?}: last use of {} {local:?} ({})", + if mir.local_kind(local) == LocalKind::Temp { + "temporary" + } else { + "local" + }, + describe_local(tcx, &mir.local_decls[local]), + ), + ); + } + } +} + /// Run the `pointee_type` analysis, which tries to determine the actual type of data that each /// pointer can point to. This is particularly important for `void*` pointers, which are typically /// cast to a different type before use. diff --git a/c2rust-analyze/src/last_use.rs b/c2rust-analyze/src/last_use.rs new file mode 100644 index 000000000..23382419f --- /dev/null +++ b/c2rust-analyze/src/last_use.rs @@ -0,0 +1,380 @@ +//! Analysis to find the last use of a MIR local. For non-`Copy` types, the last use can be made +//! into a move instead of a clone or borrow. +//! +//! In terms of liveness analysis, a "last use" is a statement where a variable transitions from +//! live to dead as a result of its use in the statement. A variable can also become dead due to +//! control flow: in `if f(p) { g(p); }`, the variable `p` is still live after evaluating `f(p)`, +//! but becomes dead upon taking the `else` branch, which contains no more uses of `p`. In this +//! example, `g(p)` is a last use, but `f(p)` is not (and in the `else` case, no last use of `p` +//! will be encountered during execution). +//! +//! We specifically use this when handling borrows of non-`Copy` pointers like `Option<&mut T>`. +//! At most use sites, we produce `ptr.as_deref_mut().unwrap() ...`, which borrows `ptr` instead of +//! moving it. However, this construction produces a reference with a shorter lifetime, matching +//! the lifetime of the local `ptr`. When returning a borrow from a function, we instead want to +//! emit `ptr.unwrap() ...`, which consumes `ptr` but produces a reference with the same lifetime +//! as the reference in `ptr`. + +use std::collections::{HashMap, VecDeque}; + +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::traversal; +use rustc_middle::mir::{ + BasicBlock, BasicBlockData, Body, Local, Location, Operand, Place, Rvalue, Statement, + StatementKind, Terminator, TerminatorKind, +}; + +/// A single MIR `Statement` or `Terminator` may refer to multiple `Place`s in different +/// `Operand`s. This type identifies a particular `Place` of interest within the `Statement` +/// or`Terminator`. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] +pub enum WhichPlace { + Lvalue, + Operand(usize), +} + +#[derive(Clone, Debug, Default)] +pub struct LastUse { + /// `(location, which_place)` is present in this map if the indicated `Place` is the last use + /// of the value currently in that `Place`'s `Local`. + /// + /// Note that there may be more than one "last use" for a given local. If a variable `p` is + /// used, overwritten, and used again, then both uses may be marked as last uses: the first + /// value stored in `p` is used for the last time, then a new value is stored in `p`, and the + /// second value is also used (possibly for the last time). When there is branching control + /// flow, such as `if cond { f(p); } else { g(p); }`, then there may be a last use in each + /// branch. + /// + /// Also, there may be zero last uses for a given local. In `while f(p) { g(p); }`, the use + /// `g(p)` can't be a last use because `f(p)` will be called afterward, and `f(p)` can't be a + /// last use because `g(p)` might be called afterward. + /// + /// The `Local` value is used only for debugging; it isn't needed for generating rewrites. + set: HashMap<(Location, WhichPlace), Local>, +} + +impl LastUse { + pub fn is_last_use(&self, loc: Location, which: WhichPlace) -> bool { + self.set.contains_key(&(loc, which)) + } + + pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + self.set + .iter() + .map(|(&(loc, which), &local)| (loc, which, local)) + } +} + +struct Action { + kind: ActionKind, + local: Local, + location: Location, + which_place: WhichPlace, +} + +enum ActionKind { + /// The local was written. This only counts direct writes that update the entire local, like + /// `_1 = ...`. Writes to fields or indirect writes through a reference don't count as `Def`s. + Def, + Use, +} + +struct ActionsBuilder { + actions: Vec, +} + +impl ActionsBuilder { + pub fn new() -> ActionsBuilder { + ActionsBuilder { + actions: Vec::new(), + } + } + + pub fn push_action(&mut self, act: Action) { + self.actions.push(act); + } + + pub fn finish(self) -> Vec { + self.actions + } + + pub fn push_place_use(&mut self, pl: Place, loc: Location, which: WhichPlace) { + self.push_action(Action { + kind: ActionKind::Use, + local: pl.local, + location: loc, + which_place: which, + }); + } + + pub fn push_lvalue_place(&mut self, pl: Place, loc: Location) { + if pl.projection.is_empty() { + self.push_action(Action { + kind: ActionKind::Def, + local: pl.local, + location: loc, + which_place: WhichPlace::Lvalue, + }); + } else { + self.push_place_use(pl, loc, WhichPlace::Lvalue); + } + } + + pub fn push_operand(&mut self, op: &Operand, loc: Location, which: WhichPlace) { + match *op { + Operand::Copy(pl) | Operand::Move(pl) => self.push_place_use(pl, loc, which), + Operand::Constant(_) => {} + } + } + + pub fn push_rvalue(&mut self, rv: &Rvalue, loc: Location) { + match *rv { + Rvalue::Use(ref op) => { + self.push_operand(op, loc, WhichPlace::Operand(0)); + } + Rvalue::Repeat(ref op, _) => { + self.push_operand(op, loc, WhichPlace::Operand(0)); + } + Rvalue::Ref(_rg, _kind, pl) => { + self.push_place_use(pl, loc, WhichPlace::Operand(0)); + } + Rvalue::ThreadLocalRef(_) => {} + Rvalue::AddressOf(_mutbl, pl) => { + self.push_place_use(pl, loc, WhichPlace::Operand(0)); + } + Rvalue::Len(pl) => { + self.push_place_use(pl, loc, WhichPlace::Operand(0)); + } + Rvalue::Cast(_kind, ref op, _ty) => { + self.push_operand(op, loc, WhichPlace::Operand(0)); + } + Rvalue::BinaryOp(_bin_op, ref x) | Rvalue::CheckedBinaryOp(_bin_op, ref x) => { + let (ref a, ref b) = **x; + self.push_operand(a, loc, WhichPlace::Operand(0)); + self.push_operand(b, loc, WhichPlace::Operand(1)); + } + Rvalue::NullaryOp(_null_op, _ty) => {} + Rvalue::UnaryOp(_un_op, ref op) => { + self.push_operand(op, loc, WhichPlace::Operand(0)); + } + Rvalue::Discriminant(pl) => { + self.push_place_use(pl, loc, WhichPlace::Operand(0)); + } + Rvalue::Aggregate(_, ref ops) => { + for (i, op) in ops.iter().enumerate() { + self.push_operand(op, loc, WhichPlace::Operand(i)); + } + } + Rvalue::ShallowInitBox(ref op, _ty) => { + self.push_operand(op, loc, WhichPlace::Operand(0)); + } + Rvalue::CopyForDeref(pl) => { + self.push_place_use(pl, loc, WhichPlace::Operand(0)); + } + } + } + + pub fn push_statement(&mut self, stmt: &Statement, loc: Location) { + match stmt.kind { + StatementKind::Assign(ref x) => { + let (pl, ref rv) = **x; + // The RHS is evaluated before the LHS. + self.push_rvalue(rv, loc); + self.push_lvalue_place(pl, loc); + } + StatementKind::FakeRead(..) => {} + StatementKind::SetDiscriminant { + ref place, + variant_index: _, + } => { + self.push_place_use(**place, loc, WhichPlace::Operand(0)); + } + StatementKind::Deinit(ref place) => { + self.push_place_use(**place, loc, WhichPlace::Operand(0)); + } + StatementKind::StorageLive(..) => {} + StatementKind::StorageDead(..) => {} + // `Retag` shouldn't appear: "These statements are currently only interpreted by miri + // and only generated when `-Z mir-emit-retag` is passed." + StatementKind::Retag(..) => panic!("unexpected StatementKind::Retag"), + StatementKind::AscribeUserType(..) => {} + StatementKind::Coverage(..) => {} + StatementKind::CopyNonOverlapping(ref cno) => { + self.push_operand(&cno.src, loc, WhichPlace::Operand(0)); + self.push_operand(&cno.dst, loc, WhichPlace::Operand(0)); + self.push_operand(&cno.count, loc, WhichPlace::Operand(0)); + } + StatementKind::Nop => {} + } + } + + pub fn push_terminator(&mut self, term: &Terminator, loc: Location) { + match term.kind { + TerminatorKind::Goto { .. } => {} + TerminatorKind::SwitchInt { ref discr, .. } => { + self.push_operand(discr, loc, WhichPlace::Operand(0)); + } + TerminatorKind::Resume => {} + TerminatorKind::Abort => {} + TerminatorKind::Return => {} + TerminatorKind::Unreachable => {} + // We ignore automatically-inserted `Drop`s, since the `Drop` may be eliminated if a + // previous use is converted to a move. + TerminatorKind::Drop { .. } => {} + // `DropAndReplace` is used for some assignments. + TerminatorKind::DropAndReplace { + place, ref value, .. + } => { + self.push_operand(value, loc, WhichPlace::Operand(0)); + self.push_lvalue_place(place, loc); + } + TerminatorKind::Call { + ref func, ref args, .. + } => { + self.push_operand(func, loc, WhichPlace::Operand(0)); + for (i, op) in args.iter().enumerate() { + self.push_operand(op, loc, WhichPlace::Operand(i + 1)); + } + } + // TODO: Handle `Assert`. For now We ignore it because it isn't generated for any + // pointer-related operations, and we don't currently care about `LastUse` results for + // non-pointers. + TerminatorKind::Assert { .. } => {} + TerminatorKind::Yield { .. } => panic!("unsupported TerminatorKind::Yield"), + TerminatorKind::GeneratorDrop => {} + TerminatorKind::FalseEdge { .. } => {} + TerminatorKind::FalseUnwind { .. } => {} + TerminatorKind::InlineAsm { .. } => panic!("unsupported TerminatorKind::InlineAsm"), + } + } +} + +fn calc_block_actions(bb: BasicBlock, bb_data: &BasicBlockData) -> Vec { + let mut builder = ActionsBuilder::new(); + + for (i, stmt) in bb_data.statements.iter().enumerate() { + builder.push_statement( + stmt, + Location { + block: bb, + statement_index: i, + }, + ); + } + + if let Some(ref term) = bb_data.terminator { + builder.push_terminator( + term, + Location { + block: bb, + statement_index: bb_data.statements.len(), + }, + ); + } + + builder.finish() +} + +pub fn calc_last_use(mir: &Body) -> LastUse { + // Build a list of relevant actions for each block. An action is an access (`Use`) or a write + // (`Def`) of a local variable. + let block_actions: IndexVec> = mir + .basic_blocks + .iter_enumerated() + .map(|(bb, data)| calc_block_actions(bb, data)) + .collect(); + + // Compute liveness on entry for each block. + let mut entry: IndexVec> = IndexVec::from_elem_n( + BitSet::new_empty(mir.local_decls.len()), + mir.basic_blocks.len(), + ); + let mut dirty: BitSet = BitSet::new_empty(mir.basic_blocks.len()); + let mut queue = VecDeque::new(); + // In some cases, `postorder` doesn't return all the blocks. We add any leftover blocks + // afterward. + for (bb, _) in traversal::postorder(mir).chain(mir.basic_blocks.iter_enumerated()) { + if dirty.insert(bb) { + queue.push_back(bb); + } + } + eprintln!("queue = {queue:?}"); + + while let Some(bb) = queue.pop_front() { + let mut live = BitSet::new_empty(mir.local_decls.len()); + // Start at the end of the block and work backwards. A local is live on exit from the + // block if it's live in at least one successor. + for succ in mir.basic_blocks[bb].terminator().successors() { + live.union(&entry[succ]); + } + // Apply the effects of the block. + for act in block_actions[bb].iter().rev() { + // A variable is live if it may be read before it is written. + match act.kind { + ActionKind::Use => { + // This is a read. The variable is definitely live prior to this. + live.insert(act.local); + } + ActionKind::Def => { + // This is a write. The variable is definitely not live prior to this. + live.remove(act.local); + } + } + } + // If the live variables on entry changed for this block, recompute its predecessors. + let changed = live != entry[bb]; + eprintln!("update {bb:?}: {:?} -> {:?}", entry[bb], live); + entry[bb] = live; + dirty.remove(bb); + if changed { + for &pred in &mir.basic_blocks.predecessors()[bb] { + if dirty.insert(pred) { + eprintln!(" enqueue {pred:?}"); + queue.push_back(pred); + } + } + } + } + + debug_assert!(dirty.is_empty()); + + // Make a final pass over each block to find the `LastUse` of each variable. Any time we see + // an `ActionKind::Use` where the local is live before but dead after, we record that action as + // a "last use". + let mut last_use = LastUse { + set: HashMap::new(), + }; + for (bb, actions) in block_actions.iter_enumerated() { + let mut live = BitSet::new_empty(mir.local_decls.len()); + for succ in mir.basic_blocks[bb].terminator().successors() { + live.union(&entry[succ]); + } + + for act in actions.iter().rev() { + match act.kind { + ActionKind::Use => { + if live.insert(act.local) { + // We're processing the actions in reverse order, and this local was dead + // (not present in the `live` set) before this `insert` call, and now is + // live. This means during forward execution of the block, the local is + // live before this action and dead afterward. + last_use + .set + .insert((act.location, act.which_place), act.local); + } + } + ActionKind::Def => { + live.remove(act.local); + } + } + } + + debug_assert_eq!( + live, entry[bb], + "calc_last_use failed to reach fixpoint for {bb:?}" + ); + } + + last_use +} diff --git a/c2rust-analyze/src/main.rs b/c2rust-analyze/src/main.rs index f882f1652..219fbafbe 100644 --- a/c2rust-analyze/src/main.rs +++ b/c2rust-analyze/src/main.rs @@ -23,6 +23,7 @@ mod dataflow; mod equiv; mod known_fn; mod labeled_ty; +mod last_use; mod log; mod panic_detail; mod pointee_type; diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 8a730c02a..b2862ff3a 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -881,20 +881,34 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr ) } - mir_op::RewriteKind::OptionDowngrade { mutbl, deref } => { + mir_op::RewriteKind::OptionDowngrade { mutbl, kind } => { // `p` -> `Some(p)` - let ref_method = if deref { - if mutbl { - "as_deref_mut".into() - } else { - "as_deref".into() + match kind { + mir_op::OptionDowngradeKind::Borrow => { + let ref_method = if mutbl { + "as_mut".into() + } else { + "as_ref".into() + }; + Rewrite::MethodCall(ref_method, Box::new(hir_rw), vec![]) } - } else if mutbl { - "as_mut".into() - } else { - "as_ref".into() - }; - Rewrite::MethodCall(ref_method, Box::new(hir_rw), vec![]) + mir_op::OptionDowngradeKind::Deref => { + let ref_method = if mutbl { + "as_deref_mut".into() + } else { + "as_deref".into() + }; + Rewrite::MethodCall(ref_method, Box::new(hir_rw), vec![]) + } + mir_op::OptionDowngradeKind::MoveAndDeref => { + let closure = if mutbl { + format_rewrite!("|ptr| &mut *ptr") + } else { + format_rewrite!("|ptr| &*ptr") + }; + Rewrite::MethodCall("map".into(), Box::new(hir_rw), vec![closure]) + } + } } mir_op::RewriteKind::DynOwnedUnwrap => { diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index d579f698b..f31835d25 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -8,6 +8,7 @@ //! materialize adjustments only on code that's subject to some rewrite. use crate::context::{AnalysisCtxt, Assignment, DontRewriteFnReason, FlagSet, LTy, PermissionSet}; +use crate::last_use::{self, LastUse}; use crate::panic_detail; use crate::pointee_type::PointeeTypes; use crate::pointer_id::{GlobalPointerTable, PointerId, PointerTable}; @@ -138,7 +139,10 @@ pub enum RewriteKind { OptionMapEnd, /// Downgrade ownership of an `Option` to `Option<&_>` or `Option<&mut _>` by calling /// `as_ref()`/`as_mut()` or `as_deref()`/`as_deref_mut()`. - OptionDowngrade { mutbl: bool, deref: bool }, + OptionDowngrade { + mutbl: bool, + kind: OptionDowngradeKind, + }, /// Extract the `T` from `DynOwned`. DynOwnedUnwrap, @@ -173,6 +177,16 @@ pub enum RewriteKind { AsPtr, } +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum OptionDowngradeKind { + /// E.g. `Option` to `Option<&mut T>` via `p.as_mut()` + Borrow, + /// E.g. `Option>` to `Option<&mut T>` via `p.as_deref_mut()` + Deref, + /// E.g. `Option<&mut T>` to `Option<&T>` via `p.map(|ptr| &*ptr)` + MoveAndDeref, +} + #[derive(Clone, PartialEq, Eq, Debug)] pub enum ZeroizeType { /// Zeroize by storing the literal `0`. @@ -240,6 +254,7 @@ struct ExprRewriteVisitor<'a, 'tcx> { perms: &'a GlobalPointerTable, flags: &'a GlobalPointerTable, pointee_types: PointerTable<'a, PointeeTypes<'tcx>>, + last_use: &'a LastUse, rewrites: &'a mut HashMap>, mir: &'a Body<'tcx>, loc: Location, @@ -252,6 +267,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { acx: &'a AnalysisCtxt<'a, 'tcx>, asn: &'a Assignment, pointee_types: PointerTable<'a, PointeeTypes<'tcx>>, + last_use: &'a LastUse, rewrites: &'a mut HashMap>, mir: &'a Body<'tcx>, ) -> ExprRewriteVisitor<'a, 'tcx> { @@ -262,6 +278,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { perms, flags, pointee_types, + last_use, rewrites, mir, loc: Location { @@ -363,6 +380,12 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { desc.dyn_owned } + /// Check whether the `Place` identified by `which` within the current statement or terminator + /// is the last use of a local. + fn is_last_use(&self, which: last_use::WhichPlace) -> bool { + self.last_use.is_last_use(self.loc, which) + } + fn rewrite_lty(&self, lty: LTy<'tcx>) -> Ty<'tcx> { let tcx = self.acx.tcx(); rewrite::ty::rewrite_lty( @@ -383,6 +406,47 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { (rewritten_ty, s) } + fn current_sub_loc_as_which_place(&self) -> Option { + // This logic is a bit imprecise, but should be correct in the cases where we actually call + // it (namely, when the `Place`/`Rvalue` is simply a `Local`). + let mut which = None; + for sl in &self.sub_loc { + match *sl { + SubLoc::Dest => { + which = Some(last_use::WhichPlace::Lvalue); + } + SubLoc::Rvalue => { + which = Some(last_use::WhichPlace::Operand(0)); + // Note there may be more entries in `self.sub_loc` giving a more precise + // position, as in `[SubLoc::Rvalue, SubLoc::CallArg(0)]`. We keep looping + // over `self.sub_loc` and take the last (most precise) entry. + } + SubLoc::CallArg(i) => { + // In a call, `WhichPlace::Operand(0)` refers to the callee function. + which = Some(last_use::WhichPlace::Operand(i + 1)); + } + SubLoc::RvalueOperand(i) => { + which = Some(last_use::WhichPlace::Operand(i)); + } + SubLoc::RvaluePlace(_) => {} + SubLoc::OperandPlace => {} + SubLoc::PlaceDerefPointer => {} + SubLoc::PlaceFieldBase => {} + SubLoc::PlaceIndexArray => {} + } + } + which + } + + /// Like `is_last_use`, but infers `WhichPlace` based on `self.sub_loc`. + fn current_sub_loc_is_last_use(&self) -> bool { + if let Some(which) = self.current_sub_loc_as_which_place() { + self.is_last_use(which) + } else { + false + } + } + fn visit_statement(&mut self, stmt: &Statement<'tcx>, loc: Location) { let _g = panic_detail::set_current_span(stmt.source_info.span); debug!( @@ -466,6 +530,13 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { // place, visit the RHS place mutably and `mem::take` out of it to avoid a // static ownership transfer. + // Determine whether it's okay for the cast emitted below to move the + // expression that it's casting. This is the case if the rvalue is not a + // place, which means it produces a temporary value that can be moved as + // needed, or if the rvalue is a place but is the last use of a local. + let cast_can_move = + !rv.is_place() || (rv.is_local() && v.current_sub_loc_is_last_use()); + // Check whether this assignment transfers ownership of its RHS. If so, return // the RHS `Place`. let assignment_transfers_ownership = || { @@ -498,7 +569,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { if v.is_nullable(rv_lty.label) { v.emit(RewriteKind::OptionDowngrade { mutbl: true, - deref: false, + kind: OptionDowngradeKind::Borrow, }); v.emit(RewriteKind::OptionMapBegin); v.emit(RewriteKind::Deref); @@ -508,13 +579,13 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { if v.is_nullable(rv_lty.label) { v.emit(RewriteKind::OptionMapEnd); } - v.emit_cast_lty_lty(rv_lty, pl_lty); + v.emit_cast_lty_lty(rv_lty, pl_lty, cast_can_move); return; } // Normal case: just `visit_rvalue` and emit a cast if needed. v.visit_rvalue(rv, Some(rv_lty)); - v.emit_cast_lty_lty(rv_lty, pl_lty) + v.emit_cast_lty_lty_or_borrow(rv_lty, pl_lty, cast_can_move) }); self.enter_dest(|v| v.visit_place(pl, PlaceAccess::Mut, RequireSinglePointer::Yes)); } @@ -583,7 +654,9 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { } if !pl_ty.label.is_none() { - v.emit_cast_lty_lty(lsig.output, pl_ty); + // Emit a cast. The call returns a temporary, which the cast + // is always allowed to move. + v.emit_cast_lty_lty(lsig.output, pl_ty, true); } }); } @@ -632,7 +705,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { // TODO: The result of `MemcpySafe` is always a slice, so this cast // may be using an incorrect input type. See the comment on the // `MemcpySafe` case of `rewrite::expr::convert` for details. - v.emit_cast_lty_lty(dest_lty, pl_ty); + v.emit_cast_lty_lty(dest_lty, pl_ty, true); } }); } @@ -672,7 +745,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { && v.perms[pl_ty.label].intersects(PermissionSet::USED) { let dest_lty = v.acx.type_of(&args[0]); - v.emit_cast_lty_lty(dest_lty, pl_ty); + v.emit_cast_lty_lty(dest_lty, pl_ty, true); } }); } @@ -1046,15 +1119,22 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { fn visit_operand(&mut self, op: &Operand<'tcx>, expect_ty: Option>) { match *op { Operand::Copy(pl) | Operand::Move(pl) => { - // TODO: should this be Move, Imm, or dependent on the type? - self.enter_operand_place(|v| { - v.visit_place(pl, PlaceAccess::Move, RequireSinglePointer::Yes) - }); + let pl_lty = self.acx.type_of(pl); + // Moving out of a `DynOwned` pointer requires `Mut` access rather than `Move`. + // TODO: this should probably check `desc.dyn_owned` rather than perms directly + let access = if !pl_lty.label.is_none() + && self.perms[pl_lty.label].contains(PermissionSet::FREE) + { + PlaceAccess::Mut + } else { + PlaceAccess::Move + }; + self.enter_operand_place(|v| v.visit_place(pl, access, RequireSinglePointer::Yes)); if let Some(expect_ty) = expect_ty { - let ptr_lty = self.acx.type_of(pl); - if !ptr_lty.label.is_none() { - self.emit_cast_lty_lty(ptr_lty, expect_ty); + if !pl_lty.label.is_none() { + let cast_can_move = pl.is_local() && self.current_sub_loc_is_last_use(); + self.emit_cast_lty_lty(pl_lty, expect_ty, cast_can_move); } } } @@ -1066,12 +1146,20 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { fn visit_operand_desc(&mut self, op: &Operand<'tcx>, expect_desc: TypeDesc<'tcx>) { match *op { Operand::Copy(pl) | Operand::Move(pl) => { - // TODO: should this be Move, Imm, or dependent on the type? - self.visit_place(pl, PlaceAccess::Move, RequireSinglePointer::Yes); + let pl_lty = self.acx.type_of(pl); + // Moving out of a `DynOwned` pointer requires `Mut` access rather than `Move`. + // TODO: this should probably check `desc.dyn_owned` rather than perms directly + let access = if !pl_lty.label.is_none() + && self.perms[pl_lty.label].contains(PermissionSet::FREE) + { + PlaceAccess::Mut + } else { + PlaceAccess::Move + }; + self.enter_operand_place(|v| v.visit_place(pl, access, RequireSinglePointer::Yes)); - let ptr_lty = self.acx.type_of(pl); - if !ptr_lty.label.is_none() { - self.emit_cast_lty_desc(ptr_lty, expect_desc); + if !pl_lty.label.is_none() { + self.emit_cast_lty_desc(pl_lty, expect_desc); } } Operand::Constant(..) => {} @@ -1122,6 +1210,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { }; match last_proj { PlaceElem::Deref => { + let cast_can_move = base_pl.is_local() && self.current_sub_loc_is_last_use(); self.enter_place_deref_pointer(|v| { v.visit_place_ref(base_pl, proj_ltys, access, RequireSinglePointer::Yes); if !v.flags[base_lty.label].contains(FlagSet::FIXED) { @@ -1140,7 +1229,13 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { if !desc.own.is_copy() { v.emit(RewriteKind::OptionDowngrade { mutbl: access == PlaceAccess::Mut, - deref: !desc.dyn_owned, + kind: if desc.dyn_owned { + OptionDowngradeKind::Borrow + } else if cast_can_move { + OptionDowngradeKind::MoveAndDeref + } else { + OptionDowngradeKind::Deref + }, }); } v.emit(RewriteKind::OptionUnwrap); @@ -1149,6 +1244,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { } } if desc.dyn_owned { + // TODO: do we need a `MoveAndDeref` equivalent for `DynOwned`? v.emit(RewriteKind::DynOwnedDowngrade { mutbl: access == PlaceAccess::Mut, }); @@ -1272,10 +1368,28 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { builder.build_cast_desc_lty(from, to_lty); } - fn emit_cast_lty_lty(&mut self, from_lty: LTy<'tcx>, to_lty: LTy<'tcx>) { + /// Emit a cast from `from_lty` to `to_lty` at the current `(loc, sub_loc)`. `is_local` should + /// be set when the thing being cast is a plain local with no projections; in that case, we may + /// apply special handling if this is the last use of the local. + fn emit_cast_lty_lty(&mut self, from_lty: LTy<'tcx>, to_lty: LTy<'tcx>, cast_can_move: bool) { let perms = self.perms; let flags = self.flags; - let mut builder = CastBuilder::new(self.acx.tcx(), perms, flags, |rk| self.emit(rk)); + let mut builder = CastBuilder::new(self.acx.tcx(), perms, flags, |rk| self.emit(rk)) + .can_move(cast_can_move); + builder.build_cast_lty_lty(from_lty, to_lty); + } + + fn emit_cast_lty_lty_or_borrow( + &mut self, + from_lty: LTy<'tcx>, + to_lty: LTy<'tcx>, + cast_can_move: bool, + ) { + let perms = self.perms; + let flags = self.flags; + let mut builder = CastBuilder::new(self.acx.tcx(), perms, flags, |rk| self.emit(rk)) + .can_move(cast_can_move) + .borrow(true); builder.build_cast_lty_lty(from_lty, to_lty); } @@ -1404,6 +1518,14 @@ pub struct CastBuilder<'a, 'tcx, PT1, PT2, F> { perms: &'a PT1, flags: &'a PT2, emit: F, + /// If set, the cast builder is allowed to move the expression being cast. For example, we + /// might emit `ptr.map(|p| &mut *p).unwrap()` instead of `ptr.as_deref_mut().unwrap()` when + /// `can_move` is set; the former moves out of `ptr` but avoids an additional borrow, making it + /// suitable to be used as the result expression of a function. + can_move: bool, + /// If set, the cast builder will emit a downgrade/borrow operation even for no-op casts, if + /// the thing being cast can't be moved (`!can_move`) and also can't be copied. + borrow: bool, } impl<'a, 'tcx, PT1, PT2, F> CastBuilder<'a, 'tcx, PT1, PT2, F> @@ -1423,9 +1545,21 @@ where perms, flags, emit, + can_move: false, + borrow: false, } } + pub fn can_move(mut self, can_move: bool) -> Self { + self.can_move = can_move; + self + } + + pub fn borrow(mut self, borrow: bool) -> Self { + self.borrow = borrow; + self + } + pub fn build_cast_desc_desc(&mut self, from: TypeDesc<'tcx>, to: TypeDesc<'tcx>) { self.try_build_cast_desc_desc(from, to).unwrap() } @@ -1457,6 +1591,39 @@ where from.pointee_ty = to.pointee_ty; if from == to { + // We normally do nothing if the `from` and `to` types are the same. However, in some + // cases we need to introduce a downgrade to avoid moving the operand inappropriately. + let can_reborrow = !from.option && !from.dyn_owned; + let need_downgrade = !self.can_move && !from.own.is_copy() && !can_reborrow; + if self.borrow && need_downgrade { + let mutbl = match from.own { + Ownership::Raw | Ownership::Imm | Ownership::Cell => false, + Ownership::RawMut | Ownership::Mut => true, + // Can't downgrade in these cases. + Ownership::Rc | Ownership::Box => return Ok(()), + }; + if from.option { + (self.emit)(RewriteKind::OptionDowngrade { + mutbl, + kind: if from.dyn_owned { + OptionDowngradeKind::Borrow + } else { + OptionDowngradeKind::Deref + }, + }); + } + if from.dyn_owned { + if from.option { + (self.emit)(RewriteKind::OptionMapBegin); + (self.emit)(RewriteKind::Deref); + } + (self.emit)(RewriteKind::DynOwnedDowngrade { mutbl }); + if from.option { + (self.emit)(RewriteKind::OptionMapEnd); + } + } + } + return Ok(()); } @@ -1478,7 +1645,13 @@ where Ownership::Raw | Ownership::Imm => { (self.emit)(RewriteKind::OptionDowngrade { mutbl: false, - deref: !from.dyn_owned, + kind: if from.dyn_owned { + OptionDowngradeKind::Borrow + } else if self.can_move { + OptionDowngradeKind::MoveAndDeref + } else { + OptionDowngradeKind::Deref + }, }); deref_after_unwrap = from.dyn_owned; from.own = Ownership::Imm; @@ -1486,7 +1659,13 @@ where Ownership::RawMut | Ownership::Cell | Ownership::Mut => { (self.emit)(RewriteKind::OptionDowngrade { mutbl: true, - deref: !from.dyn_owned, + kind: if from.dyn_owned { + OptionDowngradeKind::Borrow + } else if self.can_move { + OptionDowngradeKind::MoveAndDeref + } else { + OptionDowngradeKind::Deref + }, }); deref_after_unwrap = from.dyn_owned; from.own = Ownership::Mut; @@ -1849,15 +2028,66 @@ where } } +trait IsLocal { + fn is_local(&self) -> bool; +} + +impl IsLocal for Place<'_> { + fn is_local(&self) -> bool { + self.projection.is_empty() + } +} + +impl IsLocal for PlaceRef<'_> { + fn is_local(&self) -> bool { + self.projection.is_empty() + } +} + +impl IsLocal for Operand<'_> { + fn is_local(&self) -> bool { + self.place().map_or(false, |pl| pl.is_local()) + } +} + +impl IsLocal for Rvalue<'_> { + fn is_local(&self) -> bool { + match *self { + Rvalue::Use(ref op) => op.is_local(), + _ => false, + } + } +} + +trait IsPlace { + fn is_place(&self) -> bool; +} + +impl IsPlace for Operand<'_> { + fn is_place(&self) -> bool { + self.place().is_some() + } +} + +impl IsPlace for Rvalue<'_> { + fn is_place(&self) -> bool { + match *self { + Rvalue::Use(ref op) => op.is_place(), + _ => false, + } + } +} + pub fn gen_mir_rewrites<'tcx>( acx: &AnalysisCtxt<'_, 'tcx>, asn: &Assignment, pointee_types: PointerTable>, + last_use: &LastUse, mir: &Body<'tcx>, ) -> (HashMap>, DontRewriteFnReason) { let mut out = HashMap::new(); - let mut v = ExprRewriteVisitor::new(acx, asn, pointee_types, &mut out, mir); + let mut v = ExprRewriteVisitor::new(acx, asn, pointee_types, last_use, &mut out, mir); for (bb_id, bb) in mir.basic_blocks().iter_enumerated() { for (i, stmt) in bb.statements.iter().enumerate() { diff --git a/c2rust-analyze/src/rewrite/expr/mod.rs b/c2rust-analyze/src/rewrite/expr/mod.rs index a4de66714..37bea6fdb 100644 --- a/c2rust-analyze/src/rewrite/expr/mod.rs +++ b/c2rust-analyze/src/rewrite/expr/mod.rs @@ -1,6 +1,7 @@ use self::mir_op::MirRewrite; use self::unlower::{PreciseLoc, UnlowerMap}; use crate::context::{AnalysisCtxt, Assignment}; +use crate::last_use::LastUse; use crate::pointee_type::PointeeTypes; use crate::pointer_id::PointerTable; use crate::rewrite::Rewrite; @@ -25,11 +26,12 @@ pub fn gen_expr_rewrites<'tcx>( acx: &mut AnalysisCtxt<'_, 'tcx>, asn: &Assignment, pointee_types: PointerTable>, + last_use: &LastUse, def_id: DefId, mir: &Body<'tcx>, hir_body_id: BodyId, ) -> Vec<(Span, Rewrite)> { - let (mir_rewrites, errors) = mir_op::gen_mir_rewrites(acx, asn, pointee_types, mir); + let (mir_rewrites, errors) = mir_op::gen_mir_rewrites(acx, asn, pointee_types, last_use, mir); if !errors.is_empty() { acx.gacx.dont_rewrite_fns.add(def_id, errors); } diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 380c421bb..045a7bf71 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -42,9 +42,15 @@ unsafe fn use_mut(mut p: *mut i32) -> i32 { // CHECK: *(p).as_deref_mut().unwrap() = 1; *p = 1; } + // The first use of `p` must borrow; the second and final use can move. // CHECK: use_const // CHECK-SAME: (p).as_deref() - use_const(p) + let x = use_const(p); + // CHECK: use_const + // CHECK-SAME: (p).map(|ptr| &*ptr) + // CHECK-NOT: as_deref + let y = use_const(p); + x + y } // CHECK-LABEL: unsafe fn use_const{{[<(]}}