diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index 0a92e07f4ab4..9f700192f2f2 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -87,6 +87,13 @@ impl GotocCodegenBackend { check_contract: Option, mut transformer: BodyTransformation, ) -> (GotocCtx<'tcx>, Vec, Option) { + // This runs reachability analysis before global passes are applied. + // + // Alternatively, we could run reachability only once after the global passes are applied + // and resolve the necessary dependencies inside the passes on the fly. This, however, has a + // disadvantage of not having a precomputed call graph for the global passes to use. The + // call graph could be used, for example, in resolving function pointer or vtable calls for + // global passes that need this. let (items, call_graph) = with_timer( || collect_reachable_items(tcx, &mut transformer, starting_items), "codegen reachability analysis", @@ -115,6 +122,13 @@ impl GotocCodegenBackend { call_graph, ); + // Re-collect reachable items after global transformations were applied. This is necessary + // since global pass could add extra calls to instrumentation. + let (items, _) = with_timer( + || collect_reachable_items(tcx, &mut transformer, starting_items), + "codegen reachability analysis (second pass)", + ); + // Follow rustc naming convention (cx is abbrev for context). // https://rustc-dev-guide.rust-lang.org/conventions.html#naming-conventions let mut gcx = @@ -260,8 +274,8 @@ impl CodegenBackend for GotocCodegenBackend { for unit in units.iter() { // We reset the body cache for now because each codegen unit has different // configurations that affect how we transform the instance body. - let mut transformer = BodyTransformation::new(&queries, tcx, &unit); for harness in &unit.harnesses { + let transformer = BodyTransformation::new(&queries, tcx, &unit); let model_path = units.harness_model_path(*harness).unwrap(); let contract_metadata = contract_metadata_for_harness(tcx, harness.def.def_id()); @@ -273,7 +287,7 @@ impl CodegenBackend for GotocCodegenBackend { contract_metadata, transformer, ); - transformer = results.extend(gcx, items, None); + results.extend(gcx, items, None); if let Some(assigns_contract) = contract_info { modifies_instances.push((*harness, assigns_contract)); } diff --git a/kani-compiler/src/kani_middle/mod.rs b/kani-compiler/src/kani_middle/mod.rs index a7a512c86de3..a5d077d9c16e 100644 --- a/kani-compiler/src/kani_middle/mod.rs +++ b/kani-compiler/src/kani_middle/mod.rs @@ -32,6 +32,7 @@ pub mod codegen_units; pub mod coercion; mod intrinsics; pub mod metadata; +pub mod points_to; pub mod provide; pub mod reachability; pub mod resolve; diff --git a/kani-compiler/src/kani_middle/points_to/mod.rs b/kani-compiler/src/kani_middle/points_to/mod.rs new file mode 100644 index 000000000000..21e8bdffc7b8 --- /dev/null +++ b/kani-compiler/src/kani_middle/points_to/mod.rs @@ -0,0 +1,11 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! This module contains points-to analysis primitives, such as the graph and types representing its +//! nodes, and the analysis itself. + +mod points_to_analysis; +mod points_to_graph; + +pub use points_to_analysis::run_points_to_analysis; +pub use points_to_graph::{MemLoc, PointsToGraph}; diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs new file mode 100644 index 000000000000..640318ccb584 --- /dev/null +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -0,0 +1,654 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Implementation of the points-to analysis using Rust's native dataflow framework. This provides +//! necessary aliasing information for instrumenting delayed UB later on. +//! +//! The analysis uses Rust's dataflow framework by implementing appropriate traits to leverage the +//! existing fixpoint solver infrastructure. The main trait responsible for the dataflow analysis +//! behavior is `rustc_mir_dataflow::Analysis`: it provides two methods that are responsible for +//! handling statements and terminators, which we implement. +//! +//! The analysis proceeds by looking at each instruction in the dataflow order and collecting all +//! possible aliasing relations that the instruction introduces. If a terminator is a function call, +//! the analysis recurs into the function and then joins the information retrieved from it into the +//! original graph. +//! +//! For each instruction, the analysis first resolves dereference projections for each place to +//! determine which places it could point to. This is done by finding a set of successors in the +//! graph for each dereference projection. +//! +//! Then, the analysis adds the appropriate edges into the points-to graph. It proceeds until there +//! is no new information to be discovered. +//! +//! Currently, the analysis is not field-sensitive: e.g., if a field of a place aliases to some +//! other place, we treat it as if the place itself aliases to another place. + +use crate::kani_middle::{ + points_to::{MemLoc, PointsToGraph}, + reachability::CallGraph, + transform::RustcInternalMir, +}; +use rustc_ast::Mutability; +use rustc_middle::{ + mir::{ + BasicBlock, BinOp, Body, CallReturnPlaces, Location, NonDivergingIntrinsic, Operand, Place, + ProjectionElem, Rvalue, Statement, StatementKind, Terminator, TerminatorEdges, + TerminatorKind, + }, + ty::{Instance, InstanceKind, List, ParamEnv, TyCtxt, TyKind}, +}; +use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward, JoinSemiLattice}; +use rustc_smir::rustc_internal; +use rustc_span::{source_map::Spanned, DUMMY_SP}; +use stable_mir::mir::{mono::Instance as StableInstance, Body as StableBody}; +use std::collections::HashSet; + +/// Main points-to analysis object. +struct PointsToAnalysis<'a, 'tcx> { + instance: Instance<'tcx>, + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + /// This will be used in the future to resolve function pointer and vtable calls. Currently, we + /// can resolve call graph edges just by looking at the terminators and erroring if we can't + /// resolve the callee. + call_graph: &'a CallGraph, + /// This graph should contain a subset of the points-to graph reachable from function arguments. + /// For the entry function it will be empty (as it supposedly does not have any parameters). + initial_graph: PointsToGraph<'tcx>, +} + +/// Public points-to analysis entry point. Performs the analysis on a body, outputting the graph +/// containing aliasing information of the body itself and any body reachable from it. +pub fn run_points_to_analysis<'tcx>( + body: &StableBody, + tcx: TyCtxt<'tcx>, + instance: StableInstance, + call_graph: &CallGraph, +) -> PointsToGraph<'tcx> { + // Dataflow analysis does not yet work with StableMIR, so need to perform backward + // conversion. + let internal_instance = rustc_internal::internal(tcx, instance); + let internal_body = body.internal_mir(tcx); + PointsToAnalysis::run( + &internal_body, + tcx, + internal_instance, + call_graph, + PointsToGraph::empty(), + ) +} + +impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { + /// Perform the analysis on a body, outputting the graph containing aliasing information of the + /// body itself and any body reachable from it. + pub fn run( + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + call_graph: &'a CallGraph, + initial_graph: PointsToGraph<'tcx>, + ) -> PointsToGraph<'tcx> { + let analysis = Self { body, tcx, instance, call_graph, initial_graph }; + // This creates a fixpoint solver using the initial graph, the body, and extra information + // and solves the dataflow problem, producing the cursor, which contains dataflow state for + // each instruction in the body. + let mut cursor = + analysis.into_engine(tcx, body).iterate_to_fixpoint().into_results_cursor(body); + // We collect dataflow state at each `Return` terminator to determine the full aliasing + // graph for the function. This is sound since those are the only places where the function + // finishes, so the dataflow state at those places will be a union of dataflow states + // preceding to it, which means every possible execution is taken into account. + let mut results = PointsToGraph::empty(); + for (idx, bb) in body.basic_blocks.iter().enumerate() { + if let TerminatorKind::Return = bb.terminator().kind { + // Switch the cursor to the end of the block ending with `Return`. + cursor.seek_to_block_end(idx.into()); + // Retrieve the dataflow state and join into the results graph. + results.join(&cursor.get().clone()); + } + } + results + } +} + +impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { + /// Dataflow state at each instruction. + type Domain = PointsToGraph<'tcx>; + + type Direction = Forward; + + const NAME: &'static str = "PointsToAnalysis"; + + /// Dataflow state instantiated at the beginning of each basic block, before the state from + /// previous basic blocks gets joined into it. + fn bottom_value(&self, _body: &Body<'tcx>) -> Self::Domain { + PointsToGraph::empty() + } + + /// Dataflow state instantiated at the entry into the body; this should be the initial dataflow + /// graph. + fn initialize_start_block(&self, _body: &Body<'tcx>, state: &mut Self::Domain) { + state.join(&self.initial_graph.clone()); + } +} + +impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { + /// Update current dataflow state based on the information we can infer from the given + /// statement. + fn apply_statement_effect( + &mut self, + state: &mut Self::Domain, + statement: &Statement<'tcx>, + _location: Location, + ) { + // The only two statements that can introduce new aliasing information are assignments and + // copies using `copy_nonoverlapping`. + match &statement.kind { + StatementKind::Assign(assign_box) => { + let (place, rvalue) = *assign_box.clone(); + // Resolve all dereference projections for the lvalue. + let lvalue_set = state.resolve_place(place, self.instance); + // Determine all places rvalue could point to. + let rvalue_set = self.successors_for_rvalue(state, rvalue); + // Create an edge between all places which could be lvalue and all places rvalue + // could be pointing to. + state.extend(&lvalue_set, &rvalue_set); + } + StatementKind::Intrinsic(non_diverging_intrinsic) => { + match *non_diverging_intrinsic.clone() { + NonDivergingIntrinsic::CopyNonOverlapping(copy_nonoverlapping) => { + // Copy between the values pointed by `*const a` and `*mut b` is + // semantically equivalent to *b = *a with respect to aliasing. + self.apply_copy_effect( + state, + copy_nonoverlapping.src.clone(), + copy_nonoverlapping.dst.clone(), + ); + } + NonDivergingIntrinsic::Assume(..) => { /* This is a no-op. */ } + } + } + StatementKind::FakeRead(..) + | StatementKind::SetDiscriminant { .. } + | StatementKind::Deinit(..) + | StatementKind::StorageLive(..) + | StatementKind::StorageDead(..) + | StatementKind::Retag(..) + | StatementKind::PlaceMention(..) + | StatementKind::AscribeUserType(..) + | StatementKind::Coverage(..) + | StatementKind::ConstEvalCounter + | StatementKind::Nop => { /* This is a no-op with regard to aliasing. */ } + } + } + + fn apply_terminator_effect<'mir>( + &mut self, + state: &mut Self::Domain, + terminator: &'mir Terminator<'tcx>, + location: Location, + ) -> TerminatorEdges<'mir, 'tcx> { + if let TerminatorKind::Call { func, args, destination, .. } = &terminator.kind { + // Attempt to resolve callee. For now, we panic if the callee cannot be resolved (e.g., + // if a function pointer call is used), but we could leverage the call graph to resolve + // it. + let instance = match try_resolve_instance(self.body, func, self.tcx) { + Ok(instance) => instance, + Err(reason) => { + unimplemented!("{reason}") + } + }; + match instance.def { + // Intrinsics could introduce aliasing edges we care about, so need to handle them. + InstanceKind::Intrinsic(def_id) => { + match self.tcx.intrinsic(def_id).unwrap().name.to_string().as_str() { + name if name.starts_with("atomic") => { + match name { + // All `atomic_cxchg` intrinsics take `dst, old, src` as arguments. + // This is equivalent to `destination = *dst; *dst = src`. + name if name.starts_with("atomic_cxchg") => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + let src_set = + self.successors_for_operand(state, args[2].node.clone()); + let dst_set = + self.successors_for_deref(state, args[0].node.clone()); + let destination_set = + state.resolve_place(*destination, self.instance); + state.extend(&destination_set, &state.successors(&dst_set)); + state.extend(&dst_set, &src_set); + } + // All `atomic_load` intrinsics take `src` as an argument. + // This is equivalent to `destination = *src`. + name if name.starts_with("atomic_load") => { + assert_eq!( + args.len(), + 1, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Not) + )); + let src_set = + self.successors_for_deref(state, args[0].node.clone()); + let destination_set = + state.resolve_place(*destination, self.instance); + state.extend(&destination_set, &state.successors(&src_set)); + } + // All `atomic_store` intrinsics take `dst, val` as arguments. + // This is equivalent to `*dst = val`. + name if name.starts_with("atomic_store") => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + let dst_set = + self.successors_for_deref(state, args[0].node.clone()); + let val_set = + self.successors_for_operand(state, args[1].node.clone()); + state.extend(&dst_set, &val_set); + } + // All other `atomic` intrinsics take `dst, src` as arguments. + // This is equivalent to `destination = *dst; *dst = src`. + _ => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + let src_set = + self.successors_for_operand(state, args[1].node.clone()); + let dst_set = + self.successors_for_deref(state, args[0].node.clone()); + let destination_set = + state.resolve_place(*destination, self.instance); + state.extend(&destination_set, &state.successors(&dst_set)); + state.extend(&dst_set, &src_set); + } + }; + } + // Similar to `copy_nonoverlapping`, argument order is `src`, `dst`, `count`. + "copy" => { + assert_eq!(args.len(), 3, "Unexpected number of arguments for `copy`"); + assert!(matches!( + args[0].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Not) + )); + assert!(matches!( + args[1].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + self.apply_copy_effect( + state, + args[0].node.clone(), + args[1].node.clone(), + ); + } + // Similar to `copy_nonoverlapping`, argument order is `dst`, `src`, `count`. + "volatile_copy_memory" | "volatile_copy_nonoverlapping_memory" => { + assert_eq!(args.len(), 3, "Unexpected number of arguments for `copy`"); + assert!(matches!( + args[0].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + assert!(matches!( + args[1].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Not) + )); + self.apply_copy_effect( + state, + args[1].node.clone(), + args[0].node.clone(), + ); + } + // Semantically equivalent to dest = *a + "volatile_load" | "unaligned_volatile_load" => { + assert_eq!( + args.len(), + 1, + "Unexpected number of arguments for `volatile_load`" + ); + assert!(matches!( + args[0].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Not) + )); + // Destination of the return value. + let lvalue_set = state.resolve_place(*destination, self.instance); + let rvalue_set = self.successors_for_deref(state, args[0].node.clone()); + state.extend(&lvalue_set, &state.successors(&rvalue_set)); + } + // Semantically equivalent *a = b. + "volatile_store" | "unaligned_volatile_store" => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `volatile_store`" + ); + assert!(matches!( + args[0].node.ty(self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + let lvalue_set = self.successors_for_deref(state, args[0].node.clone()); + let rvalue_set = + self.successors_for_operand(state, args[1].node.clone()); + state.extend(&lvalue_set, &rvalue_set); + } + _ => { + // TODO: this probably does not handle all relevant intrinsics, so more + // need to be added. For more information, see: + // https://github.com/model-checking/kani/issues/3300 + if self.tcx.is_mir_available(def_id) { + self.apply_regular_call_effect(state, instance, args, destination); + } + } + } + } + _ => { + if self.tcx.is_foreign_item(instance.def_id()) { + match self + .tcx + .def_path_str_with_args(instance.def_id(), instance.args) + .as_str() + { + // This is an internal function responsible for heap allocation, + // which creates a new node we need to add to the points-to graph. + "alloc::alloc::__rust_alloc" | "alloc::alloc::__rust_alloc_zeroed" => { + let lvalue_set = state.resolve_place(*destination, self.instance); + let rvalue_set = HashSet::from([MemLoc::new_heap_allocation( + self.instance, + location, + )]); + state.extend(&lvalue_set, &rvalue_set); + } + _ => {} + } + } else { + // Otherwise, handle this as a regular function call. + self.apply_regular_call_effect(state, instance, args, destination); + } + } + } + }; + terminator.edges() + } + + /// We don't care about this and just need to implement this to implement the trait. + fn apply_call_return_effect( + &mut self, + _state: &mut Self::Domain, + _block: BasicBlock, + _return_places: CallReturnPlaces<'_, 'tcx>, + ) { + } +} + +/// Try retrieving instance for the given function operand. +fn try_resolve_instance<'tcx>( + body: &Body<'tcx>, + func: &Operand<'tcx>, + tcx: TyCtxt<'tcx>, +) -> Result, String> { + let ty = func.ty(body, tcx); + match ty.kind() { + TyKind::FnDef(def, args) => { + // Span here is used for error-reporting, which we don't expect to encounter anyway, so + // it is ok to use a dummy. + Ok(Instance::expect_resolve(tcx, ParamEnv::reveal_all(), *def, &args, DUMMY_SP)) + } + _ => Err(format!( + "Kani was not able to resolve the instance of the function operand `{ty:?}`. Currently, memory initialization checks in presence of function pointers and vtable calls are not supported. For more information about planned support, see https://github.com/model-checking/kani/issues/3300." + )), + } +} + +impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { + /// Update the analysis state according to the operation, which is semantically equivalent to `*to = *from`. + fn apply_copy_effect( + &self, + state: &mut PointsToGraph<'tcx>, + from: Operand<'tcx>, + to: Operand<'tcx>, + ) { + let lvalue_set = self.successors_for_deref(state, to); + let rvalue_set = self.successors_for_deref(state, from); + state.extend(&lvalue_set, &state.successors(&rvalue_set)); + } + + /// Find all places where the operand could point to at the current stage of the program. + fn successors_for_operand( + &self, + state: &mut PointsToGraph<'tcx>, + operand: Operand<'tcx>, + ) -> HashSet> { + match operand { + Operand::Copy(place) | Operand::Move(place) => { + // Find all places which are pointed to by the place. + state.successors(&state.resolve_place(place, self.instance)) + } + Operand::Constant(const_operand) => { + // Constants could point to a static, so need to check for that. + if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { + HashSet::from([MemLoc::new_static_allocation(static_def_id)]) + } else { + HashSet::new() + } + } + } + } + + /// Find all places where the deref of the operand could point to at the current stage of the program. + fn successors_for_deref( + &self, + state: &mut PointsToGraph<'tcx>, + operand: Operand<'tcx>, + ) -> HashSet> { + match operand { + Operand::Copy(place) | Operand::Move(place) => state.resolve_place( + place.project_deeper(&[ProjectionElem::Deref], self.tcx), + self.instance, + ), + Operand::Constant(const_operand) => { + // Constants could point to a static, so need to check for that. + if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { + HashSet::from([MemLoc::new_static_allocation(static_def_id)]) + } else { + HashSet::new() + } + } + } + } + + /// Update the analysis state according to the regular function call. + fn apply_regular_call_effect( + &mut self, + state: &mut PointsToGraph<'tcx>, + instance: Instance<'tcx>, + args: &[Spanned>], + destination: &Place<'tcx>, + ) { + // Here we simply call another function, so need to retrieve internal body for it. + let new_body = { + let stable_instance = rustc_internal::stable(instance); + let stable_body = stable_instance.body().unwrap(); + stable_body.internal_mir(self.tcx) + }; + + // In order to be efficient, create a new graph for the function call analysis, which only + // contains arguments and statics and anything transitively reachable from them. + let mut initial_graph = PointsToGraph::empty(); + for arg in args.iter() { + match arg.node { + Operand::Copy(place) | Operand::Move(place) => { + initial_graph + .join(&state.transitive_closure(state.resolve_place(place, self.instance))); + } + Operand::Constant(_) => {} + } + } + + // A missing link is the connections between the arguments in the caller and parameters in + // the callee, add it to the graph. + if self.tcx.is_closure_like(instance.def.def_id()) { + // This means we encountered a closure call. + // Sanity check. The first argument is the closure itself and the second argument is the tupled arguments from the caller. + assert!(args.len() == 2); + // First, connect all upvars. + let lvalue_set = HashSet::from([MemLoc::new_stack_allocation( + instance, + Place { local: 1usize.into(), projection: List::empty() }, + )]); + let rvalue_set = self.successors_for_operand(state, args[0].node.clone()); + initial_graph.extend(&lvalue_set, &rvalue_set); + // Then, connect the argument tuple to each of the spread arguments. + let spread_arg_operand = args[1].node.clone(); + for i in 0..new_body.arg_count { + let lvalue_set = HashSet::from([MemLoc::new_stack_allocation( + instance, + Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }, + )]); + // This conservatively assumes all arguments alias to all parameters. + let rvalue_set = self.successors_for_operand(state, spread_arg_operand.clone()); + initial_graph.extend(&lvalue_set, &rvalue_set); + } + } else { + // Otherwise, simply connect all arguments to parameters. + for (i, arg) in args.iter().enumerate() { + let lvalue_set = HashSet::from([MemLoc::new_stack_allocation( + instance, + Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }, + )]); + let rvalue_set = self.successors_for_operand(state, arg.node.clone()); + initial_graph.extend(&lvalue_set, &rvalue_set); + } + } + + // Run the analysis. + let new_result = + PointsToAnalysis::run(&new_body, self.tcx, instance, self.call_graph, initial_graph); + // Merge the results into the current state. + state.join(&new_result); + + // Connect the return value to the return destination. + let lvalue_set = state.resolve_place(*destination, self.instance); + let rvalue_set = HashSet::from([MemLoc::new_stack_allocation( + instance, + Place { local: 0usize.into(), projection: List::empty() }, + )]); + state.extend(&lvalue_set, &state.successors(&rvalue_set)); + } + + /// Find all places where the rvalue could point to at the current stage of the program. + fn successors_for_rvalue( + &self, + state: &mut PointsToGraph<'tcx>, + rvalue: Rvalue<'tcx>, + ) -> HashSet> { + match rvalue { + // Using the operand unchanged requires determining where it could point, which + // `successors_for_operand` does. + Rvalue::Use(operand) + | Rvalue::ShallowInitBox(operand, _) + | Rvalue::Cast(_, operand, _) + | Rvalue::Repeat(operand, ..) => self.successors_for_operand(state, operand), + Rvalue::Ref(_, _, ref_place) | Rvalue::AddressOf(_, ref_place) => { + // Here, a reference to a place is created, which leaves the place + // unchanged. + state.resolve_place(ref_place, self.instance) + } + Rvalue::BinaryOp(bin_op, operands) => { + match bin_op { + BinOp::Offset => { + // Offsetting a pointer should still be within the boundaries of the + // same object, so we can simply use the operand unchanged. + let (ptr, _) = *operands.clone(); + self.successors_for_operand(state, ptr) + } + BinOp::Add + | BinOp::AddUnchecked + | BinOp::AddWithOverflow + | BinOp::Sub + | BinOp::SubUnchecked + | BinOp::SubWithOverflow + | BinOp::Mul + | BinOp::MulUnchecked + | BinOp::MulWithOverflow + | BinOp::Div + | BinOp::Rem + | BinOp::BitXor + | BinOp::BitAnd + | BinOp::BitOr + | BinOp::Shl + | BinOp::ShlUnchecked + | BinOp::Shr + | BinOp::ShrUnchecked => { + // While unlikely, those could be pointer addresses, so we need to + // track them. We assume that even shifted addresses will be within + // the same original object. + let (l_operand, r_operand) = *operands.clone(); + let l_operand_set = self.successors_for_operand(state, l_operand); + let r_operand_set = self.successors_for_operand(state, r_operand); + l_operand_set.union(&r_operand_set).cloned().collect() + } + BinOp::Eq + | BinOp::Lt + | BinOp::Le + | BinOp::Ne + | BinOp::Ge + | BinOp::Gt + | BinOp::Cmp => { + // None of those could yield an address as the result. + HashSet::new() + } + } + } + Rvalue::UnaryOp(_, operand) => { + // The same story from BinOp applies here, too. Need to track those things. + self.successors_for_operand(state, operand) + } + Rvalue::Len(..) | Rvalue::NullaryOp(..) | Rvalue::Discriminant(..) => { + // All of those should yield a constant. + HashSet::new() + } + Rvalue::Aggregate(_, operands) => { + // Conservatively find a union of all places mentioned here and resolve + // their pointees. + operands + .into_iter() + .flat_map(|operand| self.successors_for_operand(state, operand)) + .collect() + } + Rvalue::CopyForDeref(place) => { + // Resolve pointees of a place. + state.successors(&state.resolve_place(place, self.instance)) + } + Rvalue::ThreadLocalRef(def_id) => { + // We store a def_id of a static. + HashSet::from([MemLoc::new_static_allocation(def_id)]) + } + } + } +} diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs new file mode 100644 index 000000000000..d2e80f24c737 --- /dev/null +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -0,0 +1,222 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Graph data structure to store the results of points-to analysis. + +use rustc_hir::def_id::DefId; +use rustc_middle::{ + mir::{Location, Place, ProjectionElem}, + ty::{Instance, List, TyCtxt}, +}; +use rustc_mir_dataflow::{fmt::DebugWithContext, JoinSemiLattice}; +use rustc_smir::rustc_internal; +use stable_mir::mir::{ + mono::{Instance as StableInstance, StaticDef}, + Place as StablePlace, +}; +use std::collections::{HashMap, HashSet, VecDeque}; + +/// A node in the points-to graph, which could be a place on the stack, a heap allocation, or a static. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum MemLoc<'tcx> { + /// Notice that the type of `Place` here is not restricted to references or pointers. For + /// example, we propagate aliasing information for values derived from casting a pointer to a + /// usize in order to ensure soundness, as it could later be casted back to a pointer. + Stack(Instance<'tcx>, Place<'tcx>), + /// Using a combination of the instance of the function where the allocation took place and the + /// location of the allocation inside this function implements allocation-site abstraction. + Heap(Instance<'tcx>, Location), + Static(DefId), +} + +impl<'tcx> MemLoc<'tcx> { + /// Create a memory location representing a new heap allocation site. + pub fn new_heap_allocation(instance: Instance<'tcx>, location: Location) -> Self { + MemLoc::Heap(instance, location) + } + + /// Create a memory location representing a new stack allocation. + pub fn new_stack_allocation(instance: Instance<'tcx>, place: Place<'tcx>) -> Self { + MemLoc::Stack(instance, place) + } + + /// Create a memory location representing a new static allocation. + pub fn new_static_allocation(static_def: DefId) -> Self { + MemLoc::Static(static_def) + } + + /// Create a memory location representing a new stack allocation from StableMIR values. + pub fn from_stable_stack_allocation( + instance: StableInstance, + place: StablePlace, + tcx: TyCtxt<'tcx>, + ) -> Self { + let internal_instance = rustc_internal::internal(tcx, instance); + let internal_place = rustc_internal::internal(tcx, place); + Self::new_stack_allocation(internal_instance, internal_place) + } + + /// Create a memory location representing a new static allocation from StableMIR values. + pub fn from_stable_static_allocation(static_def: StaticDef, tcx: TyCtxt<'tcx>) -> Self { + let static_def_id = rustc_internal::internal(tcx, static_def); + Self::new_static_allocation(static_def_id) + } +} + +/// Graph data structure that stores the current results of the point-to analysis. The graph is +/// directed, so having an edge between two places means that one is pointing to the other. +/// +/// For example: +/// - `a = &b` would translate to `a --> b` +/// - `a = b` would translate to `a --> {all pointees of b}` (if `a` and `b` are pointers / +/// references) +/// +/// Note that the aliasing is not field-sensitive, since the nodes in the graph are places with no +/// projections, which is sound but can be imprecise. +/// +/// For example: +/// ``` +/// let ref_pair = (&a, &b); // Will add `ref_pair --> (a | b)` edges into the graph. +/// let first = ref_pair.0; // Will add `first -> (a | b)`, which is an overapproximation. +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct PointsToGraph<'tcx> { + /// A hash map of node --> {nodes} edges. + edges: HashMap, HashSet>>, +} + +impl<'tcx> PointsToGraph<'tcx> { + pub fn empty() -> Self { + Self { edges: HashMap::new() } + } + + /// Collect all nodes which have incoming edges from `nodes`. + pub fn successors(&self, nodes: &HashSet>) -> HashSet> { + nodes.iter().flat_map(|node| self.edges.get(node).cloned().unwrap_or_default()).collect() + } + + /// For each node in `from`, add an edge to each node in `to`. + pub fn extend(&mut self, from: &HashSet>, to: &HashSet>) { + for node in from.iter() { + let node_pointees = self.edges.entry(*node).or_default(); + node_pointees.extend(to.iter()); + } + } + + /// Collect all places to which a given place can alias. + /// + /// We automatically resolve dereference projections here (by finding successors for each + /// dereference projection we encounter), which is valid as long as we do it for every place we + /// add to the graph. + pub fn resolve_place( + &self, + place: Place<'tcx>, + instance: Instance<'tcx>, + ) -> HashSet> { + let place_without_projections = Place { local: place.local, projection: List::empty() }; + let mut node_set = + HashSet::from([MemLoc::new_stack_allocation(instance, place_without_projections)]); + for projection in place.projection { + match projection { + ProjectionElem::Deref => { + node_set = self.successors(&node_set); + } + ProjectionElem::Field(..) + | ProjectionElem::Index(..) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::Subslice { .. } + | ProjectionElem::Downcast(..) + | ProjectionElem::OpaqueCast(..) + | ProjectionElem::Subtype(..) => { + /* There operations are no-ops w.r.t aliasing since we are tracking it on per-object basis. */ + } + } + } + node_set + } + + /// Stable interface for `resolve_place`. + pub fn resolve_place_stable( + &self, + place: StablePlace, + instance: StableInstance, + tcx: TyCtxt<'tcx>, + ) -> HashSet> { + let internal_place = rustc_internal::internal(tcx, place); + let internal_instance = rustc_internal::internal(tcx, instance); + self.resolve_place(internal_place, internal_instance) + } + + /// Dump the graph into a file using the graphviz format for later visualization. + pub fn dump(&self, file_path: &str) { + let mut nodes: Vec = + self.edges.keys().map(|from| format!("\t\"{:?}\"", from)).collect(); + nodes.sort(); + let nodes_str = nodes.join("\n"); + + let mut edges: Vec = self + .edges + .iter() + .flat_map(|(from, to)| { + let from = format!("\"{:?}\"", from); + to.iter().map(move |to| { + let to = format!("\"{:?}\"", to); + format!("\t{} -> {}", from.clone(), to) + }) + }) + .collect(); + edges.sort(); + let edges_str = edges.join("\n"); + + std::fs::write(file_path, format!("digraph {{\n{}\n{}\n}}", nodes_str, edges_str)).unwrap(); + } + + /// Find a transitive closure of the graph starting from a set of given locations; this also + /// includes statics. + pub fn transitive_closure(&self, targets: HashSet>) -> PointsToGraph<'tcx> { + let mut result = PointsToGraph::empty(); + // Working queue. + let mut queue = VecDeque::from_iter(targets); + // Add all statics, as they can be accessed at any point. + let statics = self.edges.keys().filter(|node| matches!(node, MemLoc::Static(_))); + queue.extend(statics); + // Add all entries. + while let Some(next_target) = queue.pop_front() { + result.edges.entry(next_target).or_insert_with(|| { + let outgoing_edges = + self.edges.get(&next_target).cloned().unwrap_or(HashSet::new()); + queue.extend(outgoing_edges.iter()); + outgoing_edges.clone() + }); + } + result + } + + /// Retrieve all places to which a given place is pointing to. + pub fn pointees_of(&self, target: &MemLoc<'tcx>) -> HashSet> { + self.edges.get(&target).unwrap_or(&HashSet::new()).clone() + } +} + +/// Since we are performing the analysis using a dataflow, we need to implement a proper monotonous +/// join operation. In our case, this is a simple union of two graphs. This "lattice" is finite, +/// because in the worst case all places will alias to all places, in which case the join will be a +/// no-op. +impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> { + fn join(&mut self, other: &Self) -> bool { + let mut updated = false; + // Check every node in the other graph. + for (from, to) in other.edges.iter() { + let existing_to = self.edges.entry(*from).or_default(); + let initial_size = existing_to.len(); + existing_to.extend(to); + let new_size = existing_to.len(); + updated |= initial_size != new_size; + } + updated + } +} + +/// This is a requirement for the fixpoint solver, and there is no derive macro for this, so +/// implement it manually. +impl<'tcx, C> DebugWithContext for PointsToGraph<'tcx> {} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs new file mode 100644 index 000000000000..11ac412703ae --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -0,0 +1,152 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! This module contains the visitor responsible for collecting initial analysis targets for delayed +//! UB instrumentation. + +use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_equal_to_size; +use stable_mir::{ + mir::{ + alloc::GlobalAlloc, + mono::{Instance, InstanceKind, StaticDef}, + visit::Location, + Body, CastKind, LocalDecl, MirVisitor, Mutability, NonDivergingIntrinsic, Operand, Place, + Rvalue, Statement, StatementKind, Terminator, TerminatorKind, + }, + ty::{ConstantKind, RigidTy, TyKind}, +}; + +/// Pointer, write through which might trigger delayed UB. +pub enum AnalysisTarget { + Place(Place), + Static(StaticDef), +} + +/// Visitor that finds initial analysis targets for delayed UB instrumentation. For our purposes, +/// analysis targets are *pointers* to places reading and writing from which should be tracked. +pub struct InitialTargetVisitor { + body: Body, + targets: Vec, +} + +impl InitialTargetVisitor { + pub fn new(body: Body) -> Self { + Self { body, targets: vec![] } + } + + pub fn into_targets(self) -> Vec { + self.targets + } + + pub fn push_operand(&mut self, operand: &Operand) { + match operand { + Operand::Copy(place) | Operand::Move(place) => { + self.targets.push(AnalysisTarget::Place(place.clone())); + } + Operand::Constant(constant) => { + // Extract the static from the constant. + if let ConstantKind::Allocated(allocation) = constant.const_.kind() { + for (_, prov) in &allocation.provenance.ptrs { + if let GlobalAlloc::Static(static_def) = GlobalAlloc::from(prov.0) { + self.targets.push(AnalysisTarget::Static(static_def)); + }; + } + } + } + } + } +} + +/// We implement MirVisitor to facilitate target finding, we look for: +/// - pointer casts where pointees have different padding; +/// - calls to `copy`-like intrinsics. +impl MirVisitor for InitialTargetVisitor { + fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { + if let Rvalue::Cast(kind, operand, ty) = rvalue { + let operand_ty = operand.ty(self.body.locals()).unwrap(); + match kind { + CastKind::Transmute | CastKind::PtrToPtr => { + let operand_ty_kind = operand_ty.kind(); + let from_ty = match operand_ty_kind.rigid().unwrap() { + RigidTy::RawPtr(ty, _) | RigidTy::Ref(_, ty, _) => Some(ty), + _ => None, + }; + let ty_kind = ty.kind(); + let to_ty = match ty_kind.rigid().unwrap() { + RigidTy::RawPtr(ty, _) | RigidTy::Ref(_, ty, _) => Some(ty), + _ => None, + }; + if let (Some(from_ty), Some(to_ty)) = (from_ty, to_ty) { + if !tys_layout_equal_to_size(from_ty, to_ty) { + self.push_operand(operand); + } + } + } + _ => {} + }; + } + self.super_rvalue(rvalue, location); + } + + fn visit_statement(&mut self, stmt: &Statement, location: Location) { + if let StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(copy)) = + &stmt.kind + { + self.push_operand(©.dst); + } + self.super_statement(stmt, location); + } + + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + if let TerminatorKind::Call { func, args, .. } = &term.kind { + let instance = try_resolve_instance(self.body.locals(), func).unwrap(); + if instance.kind == InstanceKind::Intrinsic { + match instance.intrinsic_name().unwrap().as_str() { + "copy" => { + assert_eq!(args.len(), 3, "Unexpected number of arguments for `copy`"); + assert!(matches!( + args[0].ty(self.body.locals()).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + assert!(matches!( + args[1].ty(self.body.locals()).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + // Here, `dst` is the second argument. + self.push_operand(&args[1]); + } + "volatile_copy_memory" | "volatile_copy_nonoverlapping_memory" => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `volatile_copy`" + ); + assert!(matches!( + args[0].ty(self.body.locals()).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + assert!(matches!( + args[1].ty(self.body.locals()).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + // Here, `dst` is the first argument. + self.push_operand(&args[0]); + } + _ => {} + } + } + } + self.super_terminator(term, location); + } +} + +/// Try retrieving instance for the given function operand. +fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result { + let ty = func.ty(locals).unwrap(); + match ty.kind() { + TyKind::RigidTy(RigidTy::FnDef(def, args)) => Ok(Instance::resolve(def, &args).unwrap()), + _ => Err(format!( + "Kani was not able to resolve the instance of the function operand `{ty:?}`. Currently, memory initialization checks in presence of function pointers and vtable calls are not supported. For more information about planned support, see https://github.com/model-checking/kani/issues/3300." + )), + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs new file mode 100644 index 000000000000..f295fc76d4bf --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -0,0 +1,137 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Visitor that collects all instructions relevant to uninitialized memory access caused by delayed +//! UB. In practice, that means collecting all instructions where the place is featured. + +use crate::kani_middle::{ + points_to::{MemLoc, PointsToGraph}, + transform::{ + body::{InsertPosition, MutableBody, SourceInstruction}, + check_uninit::{ + relevant_instruction::{InitRelevantInstruction, MemoryInitOp}, + TargetFinder, + }, + }, +}; +use rustc_middle::ty::TyCtxt; +use stable_mir::mir::{ + mono::Instance, + visit::{Location, PlaceContext}, + BasicBlockIdx, MirVisitor, Operand, Place, Rvalue, Statement, Terminator, +}; +use std::collections::HashSet; + +pub struct InstrumentationVisitor<'a, 'tcx> { + /// Whether we should skip the next instruction, since it might've been instrumented already. + /// When we instrument an instruction, we partition the basic block, and the instruction that + /// may trigger UB becomes the first instruction of the basic block, which we need to skip + /// later. + skip_next: bool, + /// The instruction being visited at a given point. + current: SourceInstruction, + /// The target instruction that should be verified. + pub target: Option, + /// Aliasing analysis data. + points_to: &'a PointsToGraph<'tcx>, + /// The list of places we should be looking for, ignoring others + analysis_targets: &'a HashSet>, + current_instance: Instance, + tcx: TyCtxt<'tcx>, +} + +impl<'a, 'tcx> TargetFinder for InstrumentationVisitor<'a, 'tcx> { + fn find_next( + &mut self, + body: &MutableBody, + bb: BasicBlockIdx, + skip_first: bool, + ) -> Option { + self.skip_next = skip_first; + self.current = SourceInstruction::Statement { idx: 0, bb }; + self.target = None; + self.visit_basic_block(&body.blocks()[bb]); + self.target.clone() + } +} + +impl<'a, 'tcx> InstrumentationVisitor<'a, 'tcx> { + pub fn new( + points_to: &'a PointsToGraph<'tcx>, + analysis_targets: &'a HashSet>, + current_instance: Instance, + tcx: TyCtxt<'tcx>, + ) -> Self { + Self { + skip_next: false, + current: SourceInstruction::Statement { idx: 0, bb: 0 }, + target: None, + points_to, + analysis_targets, + current_instance, + tcx, + } + } + fn push_target(&mut self, source_op: MemoryInitOp) { + let target = self.target.get_or_insert_with(|| InitRelevantInstruction { + source: self.current, + after_instruction: vec![], + before_instruction: vec![], + }); + target.push_operation(source_op); + } +} + +impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { + fn visit_statement(&mut self, stmt: &Statement, location: Location) { + if self.skip_next { + self.skip_next = false; + } else if self.target.is_none() { + // Check all inner places. + self.super_statement(stmt, location); + } + // Switch to the next statement. + let SourceInstruction::Statement { idx, bb } = self.current else { unreachable!() }; + self.current = SourceInstruction::Statement { idx: idx + 1, bb }; + } + + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + if !(self.skip_next || self.target.is_some()) { + self.current = SourceInstruction::Terminator { bb: self.current.bb() }; + self.super_terminator(term, location); + } + } + + fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { + match rvalue { + Rvalue::AddressOf(..) | Rvalue::Ref(..) => { + // These operations are always legitimate for us. + } + _ => self.super_rvalue(rvalue, location), + } + } + + fn visit_place(&mut self, place: &Place, ptx: PlaceContext, location: Location) { + // Match the place by whatever it is pointing to and find an intersection with the targets. + if self + .points_to + .resolve_place_stable(place.clone(), self.current_instance, self.tcx) + .intersection(&self.analysis_targets) + .next() + .is_some() + { + // If we are mutating the place, initialize it. + if ptx.is_mutating() { + self.push_target(MemoryInitOp::SetRef { + operand: Operand::Copy(place.clone()), + value: true, + position: InsertPosition::After, + }); + } else { + // Otherwise, check its initialization. + self.push_target(MemoryInitOp::CheckRef { operand: Operand::Copy(place.clone()) }); + } + } + self.super_place(place, ptx, location) + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs new file mode 100644 index 000000000000..6b488569813f --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -0,0 +1,139 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Global transformation pass that injects checks that catch delayed UB caused by uninitialized memory. + +use std::collections::HashMap; +use std::collections::HashSet; + +use crate::args::ExtraChecks; +use crate::kani_middle::{ + points_to::{run_points_to_analysis, MemLoc, PointsToGraph}, + reachability::CallGraph, + transform::{ + body::{CheckType, MutableBody}, + check_uninit::UninitInstrumenter, + BodyTransformation, GlobalPass, TransformationResult, + }, +}; +use crate::kani_queries::QueryDb; +use initial_target_visitor::{AnalysisTarget, InitialTargetVisitor}; +use instrumentation_visitor::InstrumentationVisitor; +use rustc_middle::ty::TyCtxt; +use rustc_mir_dataflow::JoinSemiLattice; +use rustc_session::config::OutputType; +use stable_mir::{ + mir::mono::{Instance, MonoItem}, + mir::MirVisitor, + ty::FnDef, +}; + +mod initial_target_visitor; +mod instrumentation_visitor; + +#[derive(Debug)] +pub struct DelayedUbPass { + pub check_type: CheckType, + pub mem_init_fn_cache: HashMap<&'static str, FnDef>, +} + +impl DelayedUbPass { + pub fn new(check_type: CheckType) -> Self { + Self { check_type, mem_init_fn_cache: HashMap::new() } + } +} + +impl GlobalPass for DelayedUbPass { + fn is_enabled(&self, query_db: &QueryDb) -> bool { + let args = query_db.args(); + args.ub_check.contains(&ExtraChecks::Uninit) + } + + fn transform( + &mut self, + tcx: TyCtxt, + call_graph: &CallGraph, + starting_items: &[MonoItem], + instances: Vec, + transformer: &mut BodyTransformation, + ) { + // Collect all analysis targets (pointers to places reading and writing from which should be + // tracked). + let targets: HashSet<_> = instances + .iter() + .flat_map(|instance| { + let body = instance.body().unwrap(); + let mut visitor = InitialTargetVisitor::new(body.clone()); + visitor.visit_body(&body); + // Convert all places into the format of aliasing graph for later comparison. + visitor.into_targets().into_iter().map(move |analysis_target| match analysis_target + { + AnalysisTarget::Place(place) => { + MemLoc::from_stable_stack_allocation(*instance, place, tcx) + } + AnalysisTarget::Static(static_def) => { + MemLoc::from_stable_static_allocation(static_def, tcx) + } + }) + }) + .collect(); + + // Only perform this analysis if there is something to analyze. + if !targets.is_empty() { + let mut analysis_targets = HashSet::new(); + let mut global_points_to_graph = PointsToGraph::empty(); + // Analyze aliasing for every harness. + for entry_item in starting_items { + // Convert each entry function into instance, if possible. + let entry_fn = match entry_item { + MonoItem::Fn(instance) => Some(*instance), + MonoItem::Static(static_def) => { + let instance: Instance = (*static_def).into(); + instance.has_body().then_some(instance) + } + MonoItem::GlobalAsm(_) => None, + }; + if let Some(instance) = entry_fn { + let body = instance.body().unwrap(); + let results = run_points_to_analysis(&body, tcx, instance, call_graph); + global_points_to_graph.join(&results); + } + } + + // Since analysis targets are *pointers*, need to get its successors for instrumentation. + for target in targets.iter() { + analysis_targets.extend(global_points_to_graph.pointees_of(target)); + } + + // If we are generating MIR, generate the points-to graph as well. + if tcx.sess.opts.output_types.contains_key(&OutputType::Mir) { + global_points_to_graph.dump("points-to.dot"); + } + + // Instrument each instance based on the final targets we found. + for instance in instances { + let mut instrumenter = UninitInstrumenter { + check_type: self.check_type.clone(), + mem_init_fn_cache: &mut self.mem_init_fn_cache, + }; + // Retrieve the body with all local instrumentation passes applied. + let body = MutableBody::from(transformer.body(tcx, instance)); + // Instrument for delayed UB. + let target_finder = InstrumentationVisitor::new( + &global_points_to_graph, + &analysis_targets, + instance, + tcx, + ); + let (instrumentation_added, body) = + instrumenter.instrument(tcx, body, instance, target_finder); + // If some instrumentation has been performed, update the cached body in the local transformer. + if instrumentation_added { + transformer.cache.entry(instance).and_modify(|transformation_result| { + *transformation_result = TransformationResult::Modified(body.into()); + }); + } + } + } + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs index f46c143f16d1..5c7194f879d1 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs @@ -1,33 +1,44 @@ // Copyright Kani Contributors // SPDX-License-Identifier: Apache-2.0 OR MIT // -//! Implement a transformation pass that instruments the code to detect possible UB due to -//! the accesses to uninitialized memory. +//! Module containing multiple transformation passes that instrument the code to detect possible UB +//! due to the accesses to uninitialized memory. -use crate::args::ExtraChecks; -use crate::kani_middle::find_fn_def; -use crate::kani_middle::transform::body::{ - CheckType, InsertPosition, MutableBody, SourceInstruction, +use crate::kani_middle::{ + find_fn_def, + transform::body::{CheckType, InsertPosition, MutableBody, SourceInstruction}, }; -use crate::kani_middle::transform::{TransformPass, TransformationType}; -use crate::kani_queries::QueryDb; +use relevant_instruction::{InitRelevantInstruction, MemoryInitOp}; use rustc_middle::ty::TyCtxt; use rustc_smir::rustc_internal; -use stable_mir::mir::mono::Instance; -use stable_mir::mir::{AggregateKind, Body, ConstOperand, Mutability, Operand, Place, Rvalue}; -use stable_mir::ty::{ - FnDef, GenericArgKind, GenericArgs, MirConst, RigidTy, Ty, TyConst, TyKind, UintTy, +use stable_mir::{ + mir::{ + mono::Instance, AggregateKind, BasicBlockIdx, ConstOperand, Mutability, Operand, Place, + Rvalue, + }, + ty::{FnDef, GenericArgKind, GenericArgs, MirConst, RigidTy, Ty, TyConst, TyKind, UintTy}, + CrateDef, }; -use stable_mir::CrateDef; use std::collections::{HashMap, HashSet}; -use std::fmt::Debug; -use tracing::{debug, trace}; +pub use delayed_ub::DelayedUbPass; +pub use ptr_uninit::UninitPass; +pub use ty_layout::{PointeeInfo, PointeeLayout}; + +mod delayed_ub; +mod ptr_uninit; +mod relevant_instruction; mod ty_layout; -mod uninit_visitor; -pub use ty_layout::{PointeeInfo, PointeeLayout}; -use uninit_visitor::{CheckUninitVisitor, InitRelevantInstruction, MemoryInitOp}; +/// Trait that the instrumentation target providers must implement to work with the instrumenter. +trait TargetFinder { + fn find_next( + &mut self, + body: &MutableBody, + bb: BasicBlockIdx, + skip_first: bool, + ) -> Option; +} // Function bodies of those functions will not be instrumented as not to cause infinite recursion. const SKIPPED_DIAGNOSTIC_ITEMS: &[&str] = &[ @@ -41,33 +52,24 @@ const SKIPPED_DIAGNOSTIC_ITEMS: &[&str] = &[ "KaniSetStrPtrInitialized", ]; -/// Instrument the code with checks for uninitialized memory. +/// Instruments the code with checks for uninitialized memory, agnostic to the source of targets. #[derive(Debug)] -pub struct UninitPass { +pub struct UninitInstrumenter<'a> { pub check_type: CheckType, /// Used to cache FnDef lookups of injected memory initialization functions. - pub mem_init_fn_cache: HashMap<&'static str, FnDef>, + pub mem_init_fn_cache: &'a mut HashMap<&'static str, FnDef>, } -impl TransformPass for UninitPass { - fn transformation_type() -> TransformationType - where - Self: Sized, - { - TransformationType::Instrumentation - } - - fn is_enabled(&self, query_db: &QueryDb) -> bool - where - Self: Sized, - { - let args = query_db.args(); - args.ub_check.contains(&ExtraChecks::Uninit) - } - - fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { - trace!(function=?instance.name(), "transform"); - +impl<'a> UninitInstrumenter<'a> { + /// Instrument a body with memory initialization checks, the visitor that generates + /// instrumentation targets must be provided via a TF type parameter. + fn instrument( + &mut self, + tcx: TyCtxt, + mut body: MutableBody, + instance: Instance, + mut target_finder: impl TargetFinder, + ) -> (bool, MutableBody) { // Need to break infinite recursion when memory initialization checks are inserted, so the // internal functions responsible for memory initialization are skipped. if tcx @@ -80,13 +82,7 @@ impl TransformPass for UninitPass { return (false, body); } - let mut new_body = MutableBody::from(body); - let orig_len = new_body.blocks().len(); - - // Inject a call to set-up memory initialization state if the function is a harness. - if is_harness(instance, tcx) { - inject_memory_init_setup(&mut new_body, tcx, &mut self.mem_init_fn_cache); - } + let orig_len = body.blocks().len(); // Set of basic block indices for which analyzing first statement should be skipped. // @@ -100,21 +96,19 @@ impl TransformPass for UninitPass { // Do not cache body.blocks().len() since it will change as we add new checks. let mut bb_idx = 0; - while bb_idx < new_body.blocks().len() { + while bb_idx < body.blocks().len() { if let Some(candidate) = - CheckUninitVisitor::find_next(&new_body, bb_idx, skip_first.contains(&bb_idx)) + target_finder.find_next(&body, bb_idx, skip_first.contains(&bb_idx)) { - self.build_check_for_instruction(tcx, &mut new_body, candidate, &mut skip_first); + self.build_check_for_instruction(tcx, &mut body, candidate, &mut skip_first); bb_idx += 1 } else { bb_idx += 1; }; } - (orig_len != new_body.blocks().len(), new_body.into()) + (orig_len != body.blocks().len(), body) } -} -impl UninitPass { /// Inject memory initialization checks for each operation in an instruction. fn build_check_for_instruction( &mut self, @@ -123,7 +117,6 @@ impl UninitPass { instruction: InitRelevantInstruction, skip_first: &mut HashSet, ) { - debug!(?instruction, "build_check"); let mut source = instruction.source; for operation in instruction.before_instruction { self.build_check_for_operation(tcx, body, &mut source, operation, skip_first); @@ -175,7 +168,9 @@ impl UninitPass { }; match operation { - MemoryInitOp::CheckSliceChunk { .. } | MemoryInitOp::Check { .. } => { + MemoryInitOp::CheckSliceChunk { .. } + | MemoryInitOp::Check { .. } + | MemoryInitOp::CheckRef { .. } => { self.build_get_and_check(tcx, body, source, operation, pointee_ty_info, skip_first) } MemoryInitOp::SetSliceChunk { .. } @@ -211,7 +206,7 @@ impl UninitPass { // Depending on whether accessing the known number of elements in the slice, need to // pass is as an argument. let (diagnostic, args) = match &operation { - MemoryInitOp::Check { .. } => { + MemoryInitOp::Check { .. } | MemoryInitOp::CheckRef { .. } => { let diagnostic = "KaniIsPtrInitialized"; let args = vec![ptr_operand.clone(), layout_operand]; (diagnostic, args) @@ -275,14 +270,22 @@ impl UninitPass { // Make sure all non-padding bytes are initialized. collect_skipped(&operation, body, skip_first); - let ptr_operand_ty = ptr_operand.ty(body.locals()).unwrap(); + // Find the real operand type for a good error message. + let operand_ty = match &operation { + MemoryInitOp::Check { operand } + | MemoryInitOp::CheckSliceChunk { operand, .. } + | MemoryInitOp::CheckRef { operand } => operand.ty(body.locals()).unwrap(), + _ => unreachable!(), + }; body.insert_check( tcx, &self.check_type, source, operation.position(), ret_place.local, - &format!("Undefined Behavior: Reading from an uninitialized pointer of type `{ptr_operand_ty}`"), + &format!( + "Undefined Behavior: Reading from an uninitialized pointer of type `{operand_ty}`" + ), ) } @@ -483,59 +486,3 @@ pub fn resolve_mem_init_fn(fn_def: FnDef, layout_size: usize, associated_type: T ) .unwrap() } - -/// Checks if the instance is a harness -- an entry point of Kani analysis. -fn is_harness(instance: Instance, tcx: TyCtxt) -> bool { - let harness_identifiers = [ - vec![ - rustc_span::symbol::Symbol::intern("kanitool"), - rustc_span::symbol::Symbol::intern("proof_for_contract"), - ], - vec![ - rustc_span::symbol::Symbol::intern("kanitool"), - rustc_span::symbol::Symbol::intern("proof"), - ], - ]; - harness_identifiers.iter().any(|attr_path| { - tcx.has_attrs_with_path(rustc_internal::internal(tcx, instance.def.def_id()), attr_path) - }) -} - -/// Inject an initial call to set-up memory initialization tracking. -fn inject_memory_init_setup( - new_body: &mut MutableBody, - tcx: TyCtxt, - mem_init_fn_cache: &mut HashMap<&'static str, FnDef>, -) { - // First statement or terminator in the harness. - let mut source = if !new_body.blocks()[0].statements.is_empty() { - SourceInstruction::Statement { idx: 0, bb: 0 } - } else { - SourceInstruction::Terminator { bb: 0 } - }; - - // Dummy return place. - let ret_place = Place { - local: new_body.new_local( - Ty::new_tuple(&[]), - source.span(new_body.blocks()), - Mutability::Not, - ), - projection: vec![], - }; - - // Resolve the instance and inject a call to set-up the memory initialization state. - let memory_initialization_init = Instance::resolve( - get_mem_init_fn_def(tcx, "KaniInitializeMemoryInitializationState", mem_init_fn_cache), - &GenericArgs(vec![]), - ) - .unwrap(); - - new_body.insert_call( - &memory_initialization_init, - &mut source, - InsertPosition::Before, - vec![], - ret_place, - ); -} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs new file mode 100644 index 000000000000..af2753ea7175 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs @@ -0,0 +1,130 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! A transformation pass that instruments the code to detect possible UB due to the accesses to +//! uninitialized memory via raw pointers. + +use crate::args::ExtraChecks; +use crate::kani_middle::transform::{ + body::{CheckType, InsertPosition, MutableBody, SourceInstruction}, + check_uninit::{get_mem_init_fn_def, UninitInstrumenter}, + TransformPass, TransformationType, +}; +use crate::kani_queries::QueryDb; +use rustc_middle::ty::TyCtxt; +use rustc_smir::rustc_internal; +use stable_mir::{ + mir::{mono::Instance, Body, Mutability, Place}, + ty::{FnDef, GenericArgs, Ty}, + CrateDef, +}; +use std::collections::HashMap; +use std::fmt::Debug; +use tracing::trace; +use uninit_visitor::CheckUninitVisitor; + +mod uninit_visitor; + +/// Top-level pass that instruments the code with checks for uninitialized memory access through raw +/// pointers. +#[derive(Debug)] +pub struct UninitPass { + pub check_type: CheckType, + pub mem_init_fn_cache: HashMap<&'static str, FnDef>, +} + +impl TransformPass for UninitPass { + fn transformation_type() -> TransformationType + where + Self: Sized, + { + TransformationType::Instrumentation + } + + fn is_enabled(&self, query_db: &QueryDb) -> bool + where + Self: Sized, + { + let args = query_db.args(); + args.ub_check.contains(&ExtraChecks::Uninit) + } + + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + trace!(function=?instance.name(), "transform"); + + let mut changed = false; + let mut new_body = MutableBody::from(body); + + // Inject a call to set-up memory initialization state if the function is a harness. + if is_harness(instance, tcx) { + inject_memory_init_setup(&mut new_body, tcx, &mut self.mem_init_fn_cache); + changed = true; + } + + // Call a helper that performs the actual instrumentation. + let mut instrumenter = UninitInstrumenter { + check_type: self.check_type.clone(), + mem_init_fn_cache: &mut self.mem_init_fn_cache, + }; + let (instrumentation_added, body) = + instrumenter.instrument(tcx, new_body, instance, CheckUninitVisitor::new()); + + (changed || instrumentation_added, body.into()) + } +} + +/// Checks if the instance is a harness -- an entry point of Kani analysis. +fn is_harness(instance: Instance, tcx: TyCtxt) -> bool { + let harness_identifiers = [ + vec![ + rustc_span::symbol::Symbol::intern("kanitool"), + rustc_span::symbol::Symbol::intern("proof_for_contract"), + ], + vec![ + rustc_span::symbol::Symbol::intern("kanitool"), + rustc_span::symbol::Symbol::intern("proof"), + ], + ]; + harness_identifiers.iter().any(|attr_path| { + tcx.has_attrs_with_path(rustc_internal::internal(tcx, instance.def.def_id()), attr_path) + }) +} + +/// Inject an initial call to set-up memory initialization tracking. +fn inject_memory_init_setup( + new_body: &mut MutableBody, + tcx: TyCtxt, + mem_init_fn_cache: &mut HashMap<&'static str, FnDef>, +) { + // First statement or terminator in the harness. + let mut source = if !new_body.blocks()[0].statements.is_empty() { + SourceInstruction::Statement { idx: 0, bb: 0 } + } else { + SourceInstruction::Terminator { bb: 0 } + }; + + // Dummy return place. + let ret_place = Place { + local: new_body.new_local( + Ty::new_tuple(&[]), + source.span(new_body.blocks()), + Mutability::Not, + ), + projection: vec![], + }; + + // Resolve the instance and inject a call to set-up the memory initialization state. + let memory_initialization_init = Instance::resolve( + get_mem_init_fn_def(tcx, "KaniInitializeMemoryInitializationState", mem_init_fn_cache), + &GenericArgs(vec![]), + ) + .unwrap(); + + new_body.insert_call( + &memory_initialization_init, + &mut source, + InsertPosition::Before, + vec![], + ret_place, + ); +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs similarity index 71% rename from kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs rename to kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs index 10a93b727a77..837e14abc886 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs @@ -3,135 +3,28 @@ // //! Visitor that collects all instructions relevant to uninitialized memory access. -use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; -use stable_mir::mir::alloc::GlobalAlloc; -use stable_mir::mir::mono::{Instance, InstanceKind}; -use stable_mir::mir::visit::{Location, PlaceContext}; -use stable_mir::mir::{ - BasicBlockIdx, CastKind, LocalDecl, MirVisitor, Mutability, NonDivergingIntrinsic, Operand, - Place, PointerCoercion, ProjectionElem, Rvalue, Statement, StatementKind, Terminator, - TerminatorKind, +use crate::kani_middle::transform::{ + body::{InsertPosition, MutableBody, SourceInstruction}, + check_uninit::{ + relevant_instruction::{InitRelevantInstruction, MemoryInitOp}, + ty_layout::tys_layout_compatible_to_size, + TargetFinder, + }, +}; +use stable_mir::{ + mir::{ + alloc::GlobalAlloc, + mono::{Instance, InstanceKind}, + visit::{Location, PlaceContext}, + BasicBlockIdx, CastKind, LocalDecl, MirVisitor, Mutability, NonDivergingIntrinsic, Operand, + Place, PointerCoercion, ProjectionElem, Rvalue, Statement, StatementKind, Terminator, + TerminatorKind, + }, + ty::{ConstantKind, RigidTy, TyKind}, }; -use stable_mir::ty::{ConstantKind, RigidTy, Ty, TyKind}; -use strum_macros::AsRefStr; - -use super::{PointeeInfo, PointeeLayout}; - -/// Memory initialization operations: set or get memory initialization state for a given pointer. -#[derive(AsRefStr, Clone, Debug)] -pub enum MemoryInitOp { - /// Check memory initialization of data bytes in a memory region starting from the pointer - /// `operand` and of length `sizeof(operand)` bytes. - Check { operand: Operand }, - /// Set memory initialization state of data bytes in a memory region starting from the pointer - /// `operand` and of length `sizeof(operand)` bytes. - Set { operand: Operand, value: bool, position: InsertPosition }, - /// Check memory initialization of data bytes in a memory region starting from the pointer - /// `operand` and of length `count * sizeof(operand)` bytes. - CheckSliceChunk { operand: Operand, count: Operand }, - /// Set memory initialization state of data bytes in a memory region starting from the pointer - /// `operand` and of length `count * sizeof(operand)` bytes. - SetSliceChunk { operand: Operand, count: Operand, value: bool, position: InsertPosition }, - /// Set memory initialization of data bytes in a memory region starting from the reference to - /// `operand` and of length `sizeof(operand)` bytes. - SetRef { operand: Operand, value: bool, position: InsertPosition }, - /// Unsupported memory initialization operation. - Unsupported { reason: String }, - /// Operation that trivially accesses uninitialized memory, results in injecting `assert!(false)`. - TriviallyUnsafe { reason: String }, -} - -impl MemoryInitOp { - /// Produce an operand for the relevant memory initialization related operation. This is mostly - /// required so that the analysis can create a new local to take a reference in - /// `MemoryInitOp::SetRef`. - pub fn mk_operand(&self, body: &mut MutableBody, source: &mut SourceInstruction) -> Operand { - match self { - MemoryInitOp::Check { operand, .. } - | MemoryInitOp::Set { operand, .. } - | MemoryInitOp::CheckSliceChunk { operand, .. } - | MemoryInitOp::SetSliceChunk { operand, .. } => operand.clone(), - MemoryInitOp::SetRef { operand, .. } => Operand::Copy(Place { - local: { - let place = match operand { - Operand::Copy(place) | Operand::Move(place) => place, - Operand::Constant(_) => unreachable!(), - }; - body.insert_assignment( - Rvalue::AddressOf(Mutability::Not, place.clone()), - source, - self.position(), - ) - }, - projection: vec![], - }), - MemoryInitOp::Unsupported { .. } | MemoryInitOp::TriviallyUnsafe { .. } => { - unreachable!() - } - } - } - - pub fn expect_count(&self) -> Operand { - match self { - MemoryInitOp::CheckSliceChunk { count, .. } - | MemoryInitOp::SetSliceChunk { count, .. } => count.clone(), - MemoryInitOp::Check { .. } - | MemoryInitOp::Set { .. } - | MemoryInitOp::SetRef { .. } - | MemoryInitOp::Unsupported { .. } - | MemoryInitOp::TriviallyUnsafe { .. } => unreachable!(), - } - } - - pub fn expect_value(&self) -> bool { - match self { - MemoryInitOp::Set { value, .. } - | MemoryInitOp::SetSliceChunk { value, .. } - | MemoryInitOp::SetRef { value, .. } => *value, - MemoryInitOp::Check { .. } - | MemoryInitOp::CheckSliceChunk { .. } - | MemoryInitOp::Unsupported { .. } - | MemoryInitOp::TriviallyUnsafe { .. } => unreachable!(), - } - } - - pub fn position(&self) -> InsertPosition { - match self { - MemoryInitOp::Set { position, .. } - | MemoryInitOp::SetSliceChunk { position, .. } - | MemoryInitOp::SetRef { position, .. } => *position, - MemoryInitOp::Check { .. } - | MemoryInitOp::CheckSliceChunk { .. } - | MemoryInitOp::Unsupported { .. } - | MemoryInitOp::TriviallyUnsafe { .. } => InsertPosition::Before, - } - } -} - -/// Represents an instruction in the source code together with all memory initialization checks/sets -/// that are connected to the memory used in this instruction and whether they should be inserted -/// before or after the instruction. -#[derive(Clone, Debug)] -pub struct InitRelevantInstruction { - /// The instruction that affects the state of the memory. - pub source: SourceInstruction, - /// All memory-related operations that should happen after the instruction. - pub before_instruction: Vec, - /// All memory-related operations that should happen after the instruction. - pub after_instruction: Vec, -} - -impl InitRelevantInstruction { - pub fn push_operation(&mut self, source_op: MemoryInitOp) { - match source_op.position() { - InsertPosition::Before => self.before_instruction.push(source_op), - InsertPosition::After => self.after_instruction.push(source_op), - } - } -} -pub struct CheckUninitVisitor<'a> { - locals: &'a [LocalDecl], +pub struct CheckUninitVisitor { + locals: Vec, /// Whether we should skip the next instruction, since it might've been instrumented already. /// When we instrument an instruction, we partition the basic block, and the instruction that /// may trigger UB becomes the first instruction of the basic block, which we need to skip @@ -145,21 +38,32 @@ pub struct CheckUninitVisitor<'a> { bb: BasicBlockIdx, } -impl<'a> CheckUninitVisitor<'a> { - pub fn find_next( - body: &'a MutableBody, +impl TargetFinder for CheckUninitVisitor { + fn find_next( + &mut self, + body: &MutableBody, bb: BasicBlockIdx, skip_first: bool, ) -> Option { - let mut visitor = CheckUninitVisitor { - locals: body.locals(), - skip_next: skip_first, - current: SourceInstruction::Statement { idx: 0, bb }, + self.locals = body.locals().to_vec(); + self.skip_next = skip_first; + self.current = SourceInstruction::Statement { idx: 0, bb }; + self.target = None; + self.bb = bb; + self.visit_basic_block(&body.blocks()[bb]); + self.target.clone() + } +} + +impl CheckUninitVisitor { + pub fn new() -> Self { + Self { + locals: vec![], + skip_next: false, + current: SourceInstruction::Statement { idx: 0, bb: 0 }, target: None, - bb, - }; - visitor.visit_basic_block(&body.blocks()[bb]); - visitor.target + bb: 0, + } } fn push_target(&mut self, source_op: MemoryInitOp) { @@ -172,7 +76,7 @@ impl<'a> CheckUninitVisitor<'a> { } } -impl<'a> MirVisitor for CheckUninitVisitor<'a> { +impl MirVisitor for CheckUninitVisitor { fn visit_statement(&mut self, stmt: &Statement, location: Location) { if self.skip_next { self.skip_next = false; @@ -186,7 +90,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { operand: copy.src.clone(), count: copy.count.clone(), }); - // Destimation is a *mut T so it gets initialized. + // Destination is a *mut T so it gets initialized. self.push_target(MemoryInitOp::SetSliceChunk { operand: copy.dst.clone(), count: copy.count.clone(), @@ -208,7 +112,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { // if it points to initialized memory. if *projection_elem == ProjectionElem::Deref { if let TyKind::RigidTy(RigidTy::RawPtr(..)) = - place_to_add_projections.ty(&self.locals).unwrap().kind() + place_to_add_projections.ty(&&self.locals).unwrap().kind() { self.push_target(MemoryInitOp::Check { operand: Operand::Copy(place_to_add_projections.clone()), @@ -217,7 +121,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } place_to_add_projections.projection.push(projection_elem.clone()); } - if place_without_deref.ty(&self.locals).unwrap().kind().is_raw_ptr() { + if place_without_deref.ty(&&self.locals).unwrap().kind().is_raw_ptr() { self.push_target(MemoryInitOp::Set { operand: Operand::Copy(place_without_deref), value: true, @@ -226,7 +130,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } } // Check whether Rvalue creates a new initialized pointer previously not captured inside shadow memory. - if place.ty(&self.locals).unwrap().kind().is_raw_ptr() { + if place.ty(&&self.locals).unwrap().kind().is_raw_ptr() { if let Rvalue::AddressOf(..) = rvalue { self.push_target(MemoryInitOp::Set { operand: Operand::Copy(place.clone()), @@ -268,7 +172,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { match &term.kind { TerminatorKind::Call { func, args, destination, .. } => { self.super_terminator(term, location); - let instance = match try_resolve_instance(self.locals, func) { + let instance = match try_resolve_instance(&self.locals, func) { Ok(instance) => instance, Err(reason) => { self.super_terminator(term, location); @@ -297,7 +201,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `{name}`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(..)) )); self.push_target(MemoryInitOp::Check { @@ -311,11 +215,11 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `compare_bytes`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); assert!(matches!( - args[1].ty(self.locals).unwrap().kind(), + args[1].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); self.push_target(MemoryInitOp::CheckSliceChunk { @@ -336,11 +240,11 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `copy`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); assert!(matches!( - args[1].ty(self.locals).unwrap().kind(), + args[1].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); self.push_target(MemoryInitOp::CheckSliceChunk { @@ -361,11 +265,11 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `typed_swap`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); assert!(matches!( - args[1].ty(self.locals).unwrap().kind(), + args[1].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); self.push_target(MemoryInitOp::Check { @@ -382,7 +286,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `volatile_load`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); self.push_target(MemoryInitOp::Check { @@ -396,7 +300,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `volatile_store`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); self.push_target(MemoryInitOp::Set { @@ -412,7 +316,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `write_bytes`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); self.push_target(MemoryInitOp::SetSliceChunk { @@ -463,13 +367,13 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } TerminatorKind::Drop { place, .. } => { self.super_terminator(term, location); - let place_ty = place.ty(&self.locals).unwrap(); + let place_ty = place.ty(&&self.locals).unwrap(); // When drop is codegen'ed for types that could define their own dropping // behavior, a reference is taken to the place which is later implicitly coerced // to a pointer. Hence, we need to bless this pointer as initialized. match place - .ty(&self.locals) + .ty(&&self.locals) .unwrap() .kind() .rigid() @@ -511,7 +415,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { Place { local: place.local, projection: place.projection[..idx].to_vec() }; match elem { ProjectionElem::Deref => { - let ptr_ty = intermediate_place.ty(self.locals).unwrap(); + let ptr_ty = intermediate_place.ty(&self.locals).unwrap(); if ptr_ty.kind().is_raw_ptr() { self.push_target(MemoryInitOp::Check { operand: Operand::Copy(intermediate_place.clone()), @@ -572,43 +476,38 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } } } - CastKind::PtrToPtr => { - let operand_ty = operand.ty(&self.locals).unwrap(); - if let ( - RigidTy::RawPtr(from_ty, Mutability::Mut), - RigidTy::RawPtr(to_ty, Mutability::Mut), - ) = (operand_ty.kind().rigid().unwrap(), ty.kind().rigid().unwrap()) - { - if !tys_layout_compatible(from_ty, to_ty) { - // If casting from a mutable pointer to a mutable pointer with - // different layouts, delayed UB could occur. - self.push_target(MemoryInitOp::Unsupported { - reason: "Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB.".to_string(), - }); - } - } - } CastKind::Transmute => { let operand_ty = operand.ty(&self.locals).unwrap(); - if let ( - RigidTy::RawPtr(from_ty, Mutability::Mut), - RigidTy::RawPtr(to_ty, Mutability::Mut), - ) = (operand_ty.kind().rigid().unwrap(), ty.kind().rigid().unwrap()) - { - if !tys_layout_compatible(from_ty, to_ty) { - // If casting from a mutable pointer to a mutable pointer with different - // layouts, delayed UB could occur. - self.push_target(MemoryInitOp::Unsupported { - reason: "Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB.".to_string(), - }); - } - } else if !tys_layout_compatible(&operand_ty, &ty) { + if !tys_layout_compatible_to_size(&operand_ty, &ty) { // If transmuting between two types of incompatible layouts, padding // bytes are exposed, which is UB. self.push_target(MemoryInitOp::TriviallyUnsafe { reason: "Transmuting between types of incompatible layouts." .to_string(), }); + } else if let ( + TyKind::RigidTy(RigidTy::Ref(_, from_ty, _)), + TyKind::RigidTy(RigidTy::Ref(_, to_ty, _)), + ) = (operand_ty.kind(), ty.kind()) + { + if !tys_layout_compatible_to_size(&from_ty, &to_ty) { + // Since references are supposed to always be initialized for its type, + // transmuting between two references of incompatible layout is UB. + self.push_target(MemoryInitOp::TriviallyUnsafe { + reason: "Transmuting between references pointing to types of incompatible layouts." + .to_string(), + }); + } + } else if let ( + TyKind::RigidTy(RigidTy::RawPtr(from_ty, _)), + TyKind::RigidTy(RigidTy::Ref(_, to_ty, _)), + ) = (operand_ty.kind(), ty.kind()) + { + // Assert that we can only cast this way if types are the same. + assert!(from_ty == to_ty); + // When transmuting from a raw pointer to a reference, need to check that + // the value pointed by the raw pointer is initialized. + self.push_target(MemoryInitOp::Check { operand: operand.clone() }); } } _ => {} @@ -769,40 +668,7 @@ fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result Ok(Instance::resolve(def, &args).unwrap()), _ => Err(format!( - "Kani does not support reasoning about memory initialization of arguments to `{ty:?}`." + "Kani was not able to resolve the instance of the function operand `{ty:?}`. Currently, memory initialization checks in presence of function pointers and vtable calls are not supported. For more information about planned support, see https://github.com/model-checking/kani/issues/3300." )), } } - -/// Returns true if `to_ty` has a smaller or equal size and the same padding bytes as `from_ty` up until -/// its size. -fn tys_layout_compatible(from_ty: &Ty, to_ty: &Ty) -> bool { - // Retrieve layouts to assess compatibility. - let from_ty_info = PointeeInfo::from_ty(*from_ty); - let to_ty_info = PointeeInfo::from_ty(*to_ty); - if let (Ok(from_ty_info), Ok(to_ty_info)) = (from_ty_info, to_ty_info) { - let from_ty_layout = match from_ty_info.layout() { - PointeeLayout::Sized { layout } => layout, - PointeeLayout::Slice { element_layout } => element_layout, - PointeeLayout::TraitObject => return false, - }; - let to_ty_layout = match to_ty_info.layout() { - PointeeLayout::Sized { layout } => layout, - PointeeLayout::Slice { element_layout } => element_layout, - PointeeLayout::TraitObject => return false, - }; - // Ensure `to_ty_layout` does not have a larger size. - if to_ty_layout.len() <= from_ty_layout.len() { - // Check data and padding bytes pair-wise. - if from_ty_layout.iter().zip(to_ty_layout.iter()).all( - |(from_ty_layout_byte, to_ty_layout_byte)| { - // Make sure all data and padding bytes match. - from_ty_layout_byte == to_ty_layout_byte - }, - ) { - return true; - } - } - }; - false -} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs b/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs new file mode 100644 index 000000000000..3bc5b534a23b --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs @@ -0,0 +1,130 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Module containing data structures used in identifying places that need instrumentation and the +//! character of instrumentation needed. + +use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use stable_mir::mir::{Mutability, Operand, Place, Rvalue}; +use strum_macros::AsRefStr; + +/// Memory initialization operations: set or get memory initialization state for a given pointer. +#[derive(AsRefStr, Clone, Debug)] +pub enum MemoryInitOp { + /// Check memory initialization of data bytes in a memory region starting from the pointer + /// `operand` and of length `sizeof(operand)` bytes. + Check { operand: Operand }, + /// Set memory initialization state of data bytes in a memory region starting from the pointer + /// `operand` and of length `sizeof(operand)` bytes. + Set { operand: Operand, value: bool, position: InsertPosition }, + /// Check memory initialization of data bytes in a memory region starting from the pointer + /// `operand` and of length `count * sizeof(operand)` bytes. + CheckSliceChunk { operand: Operand, count: Operand }, + /// Set memory initialization state of data bytes in a memory region starting from the pointer + /// `operand` and of length `count * sizeof(operand)` bytes. + SetSliceChunk { operand: Operand, count: Operand, value: bool, position: InsertPosition }, + /// Set memory initialization of data bytes in a memory region starting from the reference to + /// `operand` and of length `sizeof(operand)` bytes. + CheckRef { operand: Operand }, + /// Set memory initialization of data bytes in a memory region starting from the reference to + /// `operand` and of length `sizeof(operand)` bytes. + SetRef { operand: Operand, value: bool, position: InsertPosition }, + /// Unsupported memory initialization operation. + Unsupported { reason: String }, + /// Operation that trivially accesses uninitialized memory, results in injecting `assert!(false)`. + TriviallyUnsafe { reason: String }, +} + +impl MemoryInitOp { + /// Produce an operand for the relevant memory initialization related operation. This is mostly + /// required so that the analysis can create a new local to take a reference in + /// `MemoryInitOp::SetRef`. + pub fn mk_operand(&self, body: &mut MutableBody, source: &mut SourceInstruction) -> Operand { + match self { + MemoryInitOp::Check { operand, .. } + | MemoryInitOp::Set { operand, .. } + | MemoryInitOp::CheckSliceChunk { operand, .. } + | MemoryInitOp::SetSliceChunk { operand, .. } => operand.clone(), + MemoryInitOp::CheckRef { operand, .. } | MemoryInitOp::SetRef { operand, .. } => { + Operand::Copy(Place { + local: { + let place = match operand { + Operand::Copy(place) | Operand::Move(place) => place, + Operand::Constant(_) => unreachable!(), + }; + body.insert_assignment( + Rvalue::AddressOf(Mutability::Not, place.clone()), + source, + self.position(), + ) + }, + projection: vec![], + }) + } + MemoryInitOp::Unsupported { .. } | MemoryInitOp::TriviallyUnsafe { .. } => { + unreachable!() + } + } + } + + pub fn expect_count(&self) -> Operand { + match self { + MemoryInitOp::CheckSliceChunk { count, .. } + | MemoryInitOp::SetSliceChunk { count, .. } => count.clone(), + MemoryInitOp::Check { .. } + | MemoryInitOp::Set { .. } + | MemoryInitOp::CheckRef { .. } + | MemoryInitOp::SetRef { .. } + | MemoryInitOp::Unsupported { .. } + | MemoryInitOp::TriviallyUnsafe { .. } => unreachable!(), + } + } + + pub fn expect_value(&self) -> bool { + match self { + MemoryInitOp::Set { value, .. } + | MemoryInitOp::SetSliceChunk { value, .. } + | MemoryInitOp::SetRef { value, .. } => *value, + MemoryInitOp::Check { .. } + | MemoryInitOp::CheckSliceChunk { .. } + | MemoryInitOp::CheckRef { .. } + | MemoryInitOp::Unsupported { .. } + | MemoryInitOp::TriviallyUnsafe { .. } => unreachable!(), + } + } + + pub fn position(&self) -> InsertPosition { + match self { + MemoryInitOp::Set { position, .. } + | MemoryInitOp::SetSliceChunk { position, .. } + | MemoryInitOp::SetRef { position, .. } => *position, + MemoryInitOp::Check { .. } + | MemoryInitOp::CheckSliceChunk { .. } + | MemoryInitOp::CheckRef { .. } + | MemoryInitOp::Unsupported { .. } + | MemoryInitOp::TriviallyUnsafe { .. } => InsertPosition::Before, + } + } +} + +/// Represents an instruction in the source code together with all memory initialization checks/sets +/// that are connected to the memory used in this instruction and whether they should be inserted +/// before or after the instruction. +#[derive(Clone, Debug)] +pub struct InitRelevantInstruction { + /// The instruction that affects the state of the memory. + pub source: SourceInstruction, + /// All memory-related operations that should happen after the instruction. + pub before_instruction: Vec, + /// All memory-related operations that should happen after the instruction. + pub after_instruction: Vec, +} + +impl InitRelevantInstruction { + pub fn push_operation(&mut self, source_op: MemoryInitOp) { + match source_op.position() { + InsertPosition::Before => self.before_instruction.push(source_op), + InsertPosition::After => self.after_instruction.push(source_op), + } + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs index 09116230af80..8a162d5944d3 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs @@ -3,10 +3,12 @@ // //! Utility functions that help calculate type layout. -use stable_mir::abi::{FieldsShape, Scalar, TagEncoding, ValueAbi, VariantsShape}; -use stable_mir::target::{MachineInfo, MachineSize}; -use stable_mir::ty::{AdtKind, IndexedVal, RigidTy, Ty, TyKind, UintTy}; -use stable_mir::CrateDef; +use stable_mir::{ + abi::{FieldsShape, Scalar, TagEncoding, ValueAbi, VariantsShape}, + target::{MachineInfo, MachineSize}, + ty::{AdtKind, IndexedVal, RigidTy, Ty, TyKind, UintTy}, + CrateDef, +}; /// Represents a chunk of data bytes in a data structure. #[derive(Clone, Debug, Eq, PartialEq, Hash)] @@ -332,3 +334,48 @@ fn data_bytes_for_ty( FieldsShape::Array { .. } => Ok(vec![]), } } + +/// Returns true if `to_ty` has a smaller or equal size and padding bytes in `from_ty` are padding +/// bytes in `to_ty`. +pub fn tys_layout_compatible_to_size(from_ty: &Ty, to_ty: &Ty) -> bool { + tys_layout_cmp_to_size(from_ty, to_ty, |from_byte, to_byte| from_byte || !to_byte) +} + +/// Returns true if `to_ty` has a smaller or equal size and padding bytes in `from_ty` are padding +/// bytes in `to_ty`. +pub fn tys_layout_equal_to_size(from_ty: &Ty, to_ty: &Ty) -> bool { + tys_layout_cmp_to_size(from_ty, to_ty, |from_byte, to_byte| from_byte == to_byte) +} + +/// Returns true if `to_ty` has a smaller or equal size and comparator function returns true for all +/// byte initialization value pairs up to size. +fn tys_layout_cmp_to_size(from_ty: &Ty, to_ty: &Ty, cmp: impl Fn(bool, bool) -> bool) -> bool { + // Retrieve layouts to assess compatibility. + let from_ty_info = PointeeInfo::from_ty(*from_ty); + let to_ty_info = PointeeInfo::from_ty(*to_ty); + if let (Ok(from_ty_info), Ok(to_ty_info)) = (from_ty_info, to_ty_info) { + let from_ty_layout = match from_ty_info.layout() { + PointeeLayout::Sized { layout } => layout, + PointeeLayout::Slice { element_layout } => element_layout, + PointeeLayout::TraitObject => return false, + }; + let to_ty_layout = match to_ty_info.layout() { + PointeeLayout::Sized { layout } => layout, + PointeeLayout::Slice { element_layout } => element_layout, + PointeeLayout::TraitObject => return false, + }; + // Ensure `to_ty_layout` does not have a larger size. + if to_ty_layout.len() <= from_ty_layout.len() { + // Check data and padding bytes pair-wise. + if from_ty_layout.iter().zip(to_ty_layout.iter()).all( + |(from_ty_layout_byte, to_ty_layout_byte)| { + // Run comparator on each pair. + cmp(*from_ty_layout_byte, *to_ty_layout_byte) + }, + ) { + return true; + } + } + }; + false +} diff --git a/kani-compiler/src/kani_middle/transform/internal_mir.rs b/kani-compiler/src/kani_middle/transform/internal_mir.rs new file mode 100644 index 000000000000..0dcf7d47c13a --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/internal_mir.rs @@ -0,0 +1,656 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! This file contains conversions between from stable MIR data structures to its internal +//! counterparts. This is primarily done to facilitate using dataflow analysis, which does not yet +//! support StableMIR. We tried to contribute this back to StableMIR, but faced some push back since +//! other maintainers wanted to keep the conversions minimal. For more information, see +//! https://github.com/rust-lang/rust/pull/127782 + +use rustc_middle::ty::{self as rustc_ty, TyCtxt}; +use rustc_smir::rustc_internal::internal; +use stable_mir::mir::{ + AggregateKind, AssertMessage, Body, BorrowKind, CastKind, ConstOperand, CopyNonOverlapping, + CoroutineDesugaring, CoroutineKind, CoroutineSource, FakeBorrowKind, FakeReadCause, LocalDecl, + MutBorrowKind, NonDivergingIntrinsic, NullOp, Operand, PointerCoercion, RetagKind, Rvalue, + Statement, StatementKind, SwitchTargets, Terminator, TerminatorKind, UnwindAction, + UserTypeProjection, Variance, +}; + +pub trait RustcInternalMir { + type T<'tcx>; + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx>; +} + +impl RustcInternalMir for AggregateKind { + type T<'tcx> = rustc_middle::mir::AggregateKind<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + AggregateKind::Array(ty) => rustc_middle::mir::AggregateKind::Array(internal(tcx, ty)), + AggregateKind::Tuple => rustc_middle::mir::AggregateKind::Tuple, + AggregateKind::Adt( + adt_def, + variant_idx, + generic_args, + maybe_user_type_annotation_index, + maybe_field_idx, + ) => rustc_middle::mir::AggregateKind::Adt( + internal(tcx, adt_def.0), + internal(tcx, variant_idx), + internal(tcx, generic_args), + maybe_user_type_annotation_index + .map(rustc_middle::ty::UserTypeAnnotationIndex::from_usize), + maybe_field_idx.map(rustc_target::abi::FieldIdx::from_usize), + ), + AggregateKind::Closure(closure_def, generic_args) => { + rustc_middle::mir::AggregateKind::Closure( + internal(tcx, closure_def.0), + internal(tcx, generic_args), + ) + } + AggregateKind::Coroutine(coroutine_def, generic_args, _) => { + rustc_middle::mir::AggregateKind::Coroutine( + internal(tcx, coroutine_def.0), + internal(tcx, generic_args), + ) + } + AggregateKind::RawPtr(ty, mutability) => rustc_middle::mir::AggregateKind::RawPtr( + internal(tcx, ty), + internal(tcx, mutability), + ), + } + } +} + +impl RustcInternalMir for ConstOperand { + type T<'tcx> = rustc_middle::mir::ConstOperand<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::ConstOperand { + span: internal(tcx, self.span), + user_ty: self.user_ty.map(rustc_ty::UserTypeAnnotationIndex::from_usize), + const_: internal(tcx, self.const_.clone()), + } + } +} + +impl RustcInternalMir for Operand { + type T<'tcx> = rustc_middle::mir::Operand<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + Operand::Copy(place) => rustc_middle::mir::Operand::Copy(internal(tcx, place)), + Operand::Move(place) => rustc_middle::mir::Operand::Move(internal(tcx, place)), + Operand::Constant(const_operand) => { + rustc_middle::mir::Operand::Constant(Box::new(const_operand.internal_mir(tcx))) + } + } + } +} + +impl RustcInternalMir for PointerCoercion { + type T<'tcx> = rustc_middle::ty::adjustment::PointerCoercion; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + PointerCoercion::ReifyFnPointer => { + rustc_middle::ty::adjustment::PointerCoercion::ReifyFnPointer + } + PointerCoercion::UnsafeFnPointer => { + rustc_middle::ty::adjustment::PointerCoercion::UnsafeFnPointer + } + PointerCoercion::ClosureFnPointer(safety) => { + rustc_middle::ty::adjustment::PointerCoercion::ClosureFnPointer(internal( + tcx, safety, + )) + } + PointerCoercion::MutToConstPointer => { + rustc_middle::ty::adjustment::PointerCoercion::MutToConstPointer + } + PointerCoercion::ArrayToPointer => { + rustc_middle::ty::adjustment::PointerCoercion::ArrayToPointer + } + PointerCoercion::Unsize => rustc_middle::ty::adjustment::PointerCoercion::Unsize, + } + } +} + +impl RustcInternalMir for CastKind { + type T<'tcx> = rustc_middle::mir::CastKind; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + CastKind::PointerExposeAddress => rustc_middle::mir::CastKind::PointerExposeProvenance, + CastKind::PointerWithExposedProvenance => { + rustc_middle::mir::CastKind::PointerWithExposedProvenance + } + CastKind::PointerCoercion(ptr_coercion) => { + rustc_middle::mir::CastKind::PointerCoercion(ptr_coercion.internal_mir(tcx)) + } + CastKind::DynStar => rustc_middle::mir::CastKind::DynStar, + CastKind::IntToInt => rustc_middle::mir::CastKind::IntToInt, + CastKind::FloatToInt => rustc_middle::mir::CastKind::FloatToInt, + CastKind::FloatToFloat => rustc_middle::mir::CastKind::FloatToFloat, + CastKind::IntToFloat => rustc_middle::mir::CastKind::IntToFloat, + CastKind::PtrToPtr => rustc_middle::mir::CastKind::PtrToPtr, + CastKind::FnPtrToPtr => rustc_middle::mir::CastKind::FnPtrToPtr, + CastKind::Transmute => rustc_middle::mir::CastKind::Transmute, + } + } +} + +impl RustcInternalMir for FakeBorrowKind { + type T<'tcx> = rustc_middle::mir::FakeBorrowKind; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + FakeBorrowKind::Deep => rustc_middle::mir::FakeBorrowKind::Deep, + FakeBorrowKind::Shallow => rustc_middle::mir::FakeBorrowKind::Shallow, + } + } +} + +impl RustcInternalMir for MutBorrowKind { + type T<'tcx> = rustc_middle::mir::MutBorrowKind; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + MutBorrowKind::Default => rustc_middle::mir::MutBorrowKind::Default, + MutBorrowKind::TwoPhaseBorrow => rustc_middle::mir::MutBorrowKind::TwoPhaseBorrow, + MutBorrowKind::ClosureCapture => rustc_middle::mir::MutBorrowKind::ClosureCapture, + } + } +} + +impl RustcInternalMir for BorrowKind { + type T<'tcx> = rustc_middle::mir::BorrowKind; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + BorrowKind::Shared => rustc_middle::mir::BorrowKind::Shared, + BorrowKind::Fake(fake_borrow_kind) => { + rustc_middle::mir::BorrowKind::Fake(fake_borrow_kind.internal_mir(tcx)) + } + BorrowKind::Mut { kind } => { + rustc_middle::mir::BorrowKind::Mut { kind: kind.internal_mir(tcx) } + } + } + } +} + +impl RustcInternalMir for NullOp { + type T<'tcx> = rustc_middle::mir::NullOp<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + NullOp::SizeOf => rustc_middle::mir::NullOp::SizeOf, + NullOp::AlignOf => rustc_middle::mir::NullOp::AlignOf, + NullOp::OffsetOf(offsets) => rustc_middle::mir::NullOp::OffsetOf( + tcx.mk_offset_of( + offsets + .iter() + .map(|(variant_idx, field_idx)| { + ( + internal(tcx, variant_idx), + rustc_target::abi::FieldIdx::from_usize(*field_idx), + ) + }) + .collect::>() + .as_slice(), + ), + ), + NullOp::UbChecks => rustc_middle::mir::NullOp::UbChecks, + } + } +} + +impl RustcInternalMir for Rvalue { + type T<'tcx> = rustc_middle::mir::Rvalue<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + Rvalue::AddressOf(mutability, place) => rustc_middle::mir::Rvalue::AddressOf( + internal(tcx, mutability), + internal(tcx, place), + ), + Rvalue::Aggregate(aggregate_kind, operands) => rustc_middle::mir::Rvalue::Aggregate( + Box::new(aggregate_kind.internal_mir(tcx)), + rustc_index::IndexVec::from_raw( + operands.iter().map(|operand| operand.internal_mir(tcx)).collect(), + ), + ), + Rvalue::BinaryOp(bin_op, left_operand, right_operand) + | Rvalue::CheckedBinaryOp(bin_op, left_operand, right_operand) => { + rustc_middle::mir::Rvalue::BinaryOp( + internal(tcx, bin_op), + Box::new((left_operand.internal_mir(tcx), right_operand.internal_mir(tcx))), + ) + } + Rvalue::Cast(cast_kind, operand, ty) => rustc_middle::mir::Rvalue::Cast( + cast_kind.internal_mir(tcx), + operand.internal_mir(tcx), + internal(tcx, ty), + ), + Rvalue::CopyForDeref(place) => { + rustc_middle::mir::Rvalue::CopyForDeref(internal(tcx, place)) + } + Rvalue::Discriminant(place) => { + rustc_middle::mir::Rvalue::Discriminant(internal(tcx, place)) + } + Rvalue::Len(place) => rustc_middle::mir::Rvalue::Len(internal(tcx, place)), + Rvalue::Ref(region, borrow_kind, place) => rustc_middle::mir::Rvalue::Ref( + internal(tcx, region), + borrow_kind.internal_mir(tcx), + internal(tcx, place), + ), + Rvalue::Repeat(operand, ty_const) => rustc_middle::mir::Rvalue::Repeat( + operand.internal_mir(tcx), + internal(tcx, ty_const), + ), + Rvalue::ShallowInitBox(operand, ty) => rustc_middle::mir::Rvalue::ShallowInitBox( + operand.internal_mir(tcx), + internal(tcx, ty), + ), + Rvalue::ThreadLocalRef(crate_item) => { + rustc_middle::mir::Rvalue::ThreadLocalRef(internal(tcx, crate_item.0)) + } + Rvalue::NullaryOp(null_op, ty) => { + rustc_middle::mir::Rvalue::NullaryOp(null_op.internal_mir(tcx), internal(tcx, ty)) + } + Rvalue::UnaryOp(un_op, operand) => { + rustc_middle::mir::Rvalue::UnaryOp(internal(tcx, un_op), operand.internal_mir(tcx)) + } + Rvalue::Use(operand) => rustc_middle::mir::Rvalue::Use(operand.internal_mir(tcx)), + } + } +} + +impl RustcInternalMir for FakeReadCause { + type T<'tcx> = rustc_middle::mir::FakeReadCause; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + FakeReadCause::ForMatchGuard => rustc_middle::mir::FakeReadCause::ForMatchGuard, + FakeReadCause::ForMatchedPlace(_opaque) => { + unimplemented!("cannot convert back from an opaque field") + } + FakeReadCause::ForGuardBinding => rustc_middle::mir::FakeReadCause::ForGuardBinding, + FakeReadCause::ForLet(_opaque) => { + unimplemented!("cannot convert back from an opaque field") + } + FakeReadCause::ForIndex => rustc_middle::mir::FakeReadCause::ForIndex, + } + } +} + +impl RustcInternalMir for RetagKind { + type T<'tcx> = rustc_middle::mir::RetagKind; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + RetagKind::FnEntry => rustc_middle::mir::RetagKind::FnEntry, + RetagKind::TwoPhase => rustc_middle::mir::RetagKind::TwoPhase, + RetagKind::Raw => rustc_middle::mir::RetagKind::Raw, + RetagKind::Default => rustc_middle::mir::RetagKind::Default, + } + } +} + +impl RustcInternalMir for UserTypeProjection { + type T<'tcx> = rustc_middle::mir::UserTypeProjection; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + unimplemented!("cannot convert back from an opaque field") + } +} + +impl RustcInternalMir for Variance { + type T<'tcx> = rustc_middle::ty::Variance; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + Variance::Covariant => rustc_middle::ty::Variance::Covariant, + Variance::Invariant => rustc_middle::ty::Variance::Invariant, + Variance::Contravariant => rustc_middle::ty::Variance::Contravariant, + Variance::Bivariant => rustc_middle::ty::Variance::Bivariant, + } + } +} + +impl RustcInternalMir for CopyNonOverlapping { + type T<'tcx> = rustc_middle::mir::CopyNonOverlapping<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::CopyNonOverlapping { + src: self.src.internal_mir(tcx), + dst: self.dst.internal_mir(tcx), + count: self.count.internal_mir(tcx), + } + } +} + +impl RustcInternalMir for NonDivergingIntrinsic { + type T<'tcx> = rustc_middle::mir::NonDivergingIntrinsic<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + NonDivergingIntrinsic::Assume(operand) => { + rustc_middle::mir::NonDivergingIntrinsic::Assume(operand.internal_mir(tcx)) + } + NonDivergingIntrinsic::CopyNonOverlapping(copy_non_overlapping) => { + rustc_middle::mir::NonDivergingIntrinsic::CopyNonOverlapping( + copy_non_overlapping.internal_mir(tcx), + ) + } + } + } +} + +impl RustcInternalMir for StatementKind { + type T<'tcx> = rustc_middle::mir::StatementKind<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + StatementKind::Assign(place, rvalue) => rustc_middle::mir::StatementKind::Assign( + Box::new((internal(tcx, place), rvalue.internal_mir(tcx))), + ), + StatementKind::FakeRead(fake_read_cause, place) => { + rustc_middle::mir::StatementKind::FakeRead(Box::new(( + fake_read_cause.internal_mir(tcx), + internal(tcx, place), + ))) + } + StatementKind::SetDiscriminant { place, variant_index } => { + rustc_middle::mir::StatementKind::SetDiscriminant { + place: internal(tcx, place).into(), + variant_index: internal(tcx, variant_index), + } + } + StatementKind::Deinit(place) => { + rustc_middle::mir::StatementKind::Deinit(internal(tcx, place).into()) + } + StatementKind::StorageLive(local) => rustc_middle::mir::StatementKind::StorageLive( + rustc_middle::mir::Local::from_usize(*local), + ), + StatementKind::StorageDead(local) => rustc_middle::mir::StatementKind::StorageDead( + rustc_middle::mir::Local::from_usize(*local), + ), + StatementKind::Retag(retag_kind, place) => rustc_middle::mir::StatementKind::Retag( + retag_kind.internal_mir(tcx), + internal(tcx, place).into(), + ), + StatementKind::PlaceMention(place) => { + rustc_middle::mir::StatementKind::PlaceMention(Box::new(internal(tcx, place))) + } + StatementKind::AscribeUserType { place, projections, variance } => { + rustc_middle::mir::StatementKind::AscribeUserType( + Box::new((internal(tcx, place), projections.internal_mir(tcx))), + variance.internal_mir(tcx), + ) + } + StatementKind::Coverage(_coverage_kind) => { + unimplemented!("cannot convert back from an opaque field") + } + StatementKind::Intrinsic(non_diverging_intrinsic) => { + rustc_middle::mir::StatementKind::Intrinsic( + non_diverging_intrinsic.internal_mir(tcx).into(), + ) + } + StatementKind::ConstEvalCounter => rustc_middle::mir::StatementKind::ConstEvalCounter, + StatementKind::Nop => rustc_middle::mir::StatementKind::Nop, + } + } +} + +impl RustcInternalMir for Statement { + type T<'tcx> = rustc_middle::mir::Statement<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::Statement { + source_info: rustc_middle::mir::SourceInfo::outermost(internal(tcx, self.span)), + kind: self.kind.internal_mir(tcx), + } + } +} + +impl RustcInternalMir for UnwindAction { + type T<'tcx> = rustc_middle::mir::UnwindAction; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + UnwindAction::Continue => rustc_middle::mir::UnwindAction::Continue, + UnwindAction::Unreachable => rustc_middle::mir::UnwindAction::Unreachable, + UnwindAction::Terminate => rustc_middle::mir::UnwindAction::Terminate( + rustc_middle::mir::UnwindTerminateReason::Abi, + ), + UnwindAction::Cleanup(basic_block_idx) => rustc_middle::mir::UnwindAction::Cleanup( + rustc_middle::mir::BasicBlock::from_usize(*basic_block_idx), + ), + } + } +} + +impl RustcInternalMir for SwitchTargets { + type T<'tcx> = rustc_middle::mir::SwitchTargets; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::SwitchTargets::new( + self.branches().map(|(value, basic_block_idx)| { + (value, rustc_middle::mir::BasicBlock::from_usize(basic_block_idx)) + }), + rustc_middle::mir::BasicBlock::from_usize(self.otherwise()), + ) + } +} + +impl RustcInternalMir for CoroutineDesugaring { + type T<'tcx> = rustc_hir::CoroutineDesugaring; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + CoroutineDesugaring::Async => rustc_hir::CoroutineDesugaring::Async, + CoroutineDesugaring::Gen => rustc_hir::CoroutineDesugaring::Gen, + CoroutineDesugaring::AsyncGen => rustc_hir::CoroutineDesugaring::AsyncGen, + } + } +} + +impl RustcInternalMir for CoroutineSource { + type T<'tcx> = rustc_hir::CoroutineSource; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + CoroutineSource::Block => rustc_hir::CoroutineSource::Block, + CoroutineSource::Closure => rustc_hir::CoroutineSource::Closure, + CoroutineSource::Fn => rustc_hir::CoroutineSource::Fn, + } + } +} + +impl RustcInternalMir for CoroutineKind { + type T<'tcx> = rustc_hir::CoroutineKind; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + CoroutineKind::Desugared(coroutine_desugaring, coroutine_source) => { + rustc_hir::CoroutineKind::Desugared( + coroutine_desugaring.internal_mir(tcx), + coroutine_source.internal_mir(tcx), + ) + } + CoroutineKind::Coroutine(movability) => { + rustc_hir::CoroutineKind::Coroutine(internal(tcx, movability)) + } + } + } +} + +impl RustcInternalMir for AssertMessage { + type T<'tcx> = rustc_middle::mir::AssertMessage<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + AssertMessage::BoundsCheck { len, index } => { + rustc_middle::mir::AssertMessage::BoundsCheck { + len: len.internal_mir(tcx), + index: index.internal_mir(tcx), + } + } + AssertMessage::Overflow(bin_op, left_operand, right_operand) => { + rustc_middle::mir::AssertMessage::Overflow( + internal(tcx, bin_op), + left_operand.internal_mir(tcx), + right_operand.internal_mir(tcx), + ) + } + AssertMessage::OverflowNeg(operand) => { + rustc_middle::mir::AssertMessage::OverflowNeg(operand.internal_mir(tcx)) + } + AssertMessage::DivisionByZero(operand) => { + rustc_middle::mir::AssertMessage::DivisionByZero(operand.internal_mir(tcx)) + } + AssertMessage::RemainderByZero(operand) => { + rustc_middle::mir::AssertMessage::RemainderByZero(operand.internal_mir(tcx)) + } + AssertMessage::ResumedAfterReturn(coroutine_kind) => { + rustc_middle::mir::AssertMessage::ResumedAfterReturn( + coroutine_kind.internal_mir(tcx), + ) + } + AssertMessage::ResumedAfterPanic(coroutine_kind) => { + rustc_middle::mir::AssertMessage::ResumedAfterPanic( + coroutine_kind.internal_mir(tcx), + ) + } + AssertMessage::MisalignedPointerDereference { required, found } => { + rustc_middle::mir::AssertMessage::MisalignedPointerDereference { + required: required.internal_mir(tcx), + found: found.internal_mir(tcx), + } + } + } + } +} + +impl RustcInternalMir for TerminatorKind { + type T<'tcx> = rustc_middle::mir::TerminatorKind<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + TerminatorKind::Goto { target } => rustc_middle::mir::TerminatorKind::Goto { + target: rustc_middle::mir::BasicBlock::from_usize(*target), + }, + TerminatorKind::SwitchInt { discr, targets } => { + rustc_middle::mir::TerminatorKind::SwitchInt { + discr: discr.internal_mir(tcx), + targets: targets.internal_mir(tcx), + } + } + TerminatorKind::Resume => rustc_middle::mir::TerminatorKind::UnwindResume, + TerminatorKind::Abort => rustc_middle::mir::TerminatorKind::UnwindTerminate( + rustc_middle::mir::UnwindTerminateReason::Abi, + ), + TerminatorKind::Return => rustc_middle::mir::TerminatorKind::Return, + TerminatorKind::Unreachable => rustc_middle::mir::TerminatorKind::Unreachable, + TerminatorKind::Drop { place, target, unwind } => { + rustc_middle::mir::TerminatorKind::Drop { + place: internal(tcx, place), + target: rustc_middle::mir::BasicBlock::from_usize(*target), + unwind: unwind.internal_mir(tcx), + replace: false, + } + } + TerminatorKind::Call { func, args, destination, target, unwind } => { + rustc_middle::mir::TerminatorKind::Call { + func: func.internal_mir(tcx), + args: Box::from_iter( + args.iter().map(|arg| { + rustc_span::source_map::dummy_spanned(arg.internal_mir(tcx)) + }), + ), + destination: internal(tcx, destination), + target: target.map(|basic_block_idx| { + rustc_middle::mir::BasicBlock::from_usize(basic_block_idx) + }), + unwind: unwind.internal_mir(tcx), + call_source: rustc_middle::mir::CallSource::Normal, + fn_span: rustc_span::DUMMY_SP, + } + } + TerminatorKind::Assert { cond, expected, msg, target, unwind } => { + rustc_middle::mir::TerminatorKind::Assert { + cond: cond.internal_mir(tcx), + expected: *expected, + msg: Box::new(msg.internal_mir(tcx)), + target: rustc_middle::mir::BasicBlock::from_usize(*target), + unwind: unwind.internal_mir(tcx), + } + } + TerminatorKind::InlineAsm { .. } => todo!(), + } + } +} + +impl RustcInternalMir for Terminator { + type T<'tcx> = rustc_middle::mir::Terminator<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::Terminator { + source_info: rustc_middle::mir::SourceInfo::outermost(internal(tcx, self.span)), + kind: self.kind.internal_mir(tcx), + } + } +} + +impl RustcInternalMir for LocalDecl { + type T<'tcx> = rustc_middle::mir::LocalDecl<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::LocalDecl { + mutability: internal(tcx, self.mutability), + local_info: rustc_middle::mir::ClearCrossCrate::Set(Box::new( + rustc_middle::mir::LocalInfo::Boring, + )), + ty: internal(tcx, self.ty), + user_ty: None, + source_info: rustc_middle::mir::SourceInfo::outermost(internal(tcx, self.span)), + } + } +} + +impl RustcInternalMir for Body { + type T<'tcx> = rustc_middle::mir::Body<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + let internal_basic_blocks = rustc_index::IndexVec::from_raw( + self.blocks + .iter() + .map(|stable_basic_block| rustc_middle::mir::BasicBlockData { + statements: stable_basic_block + .statements + .iter() + .map(|statement| statement.internal_mir(tcx)) + .collect(), + terminator: Some(stable_basic_block.terminator.internal_mir(tcx)), + is_cleanup: false, + }) + .collect(), + ); + let local_decls = rustc_index::IndexVec::from_raw( + self.locals().iter().map(|local_decl| local_decl.internal_mir(tcx)).collect(), + ); + rustc_middle::mir::Body::new( + rustc_middle::mir::MirSource::item(rustc_hir::def_id::CRATE_DEF_ID.to_def_id()), + internal_basic_blocks, + rustc_index::IndexVec::new(), + local_decls, + rustc_index::IndexVec::new(), + self.arg_locals().len(), + Vec::new(), + rustc_span::DUMMY_SP, + None, + None, + ) + } +} diff --git a/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs b/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs index 82ff25bb2442..cc748c39a7f5 100644 --- a/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs +++ b/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs @@ -13,6 +13,9 @@ use crate::kani_middle::transform::body::{ CheckType, InsertPosition, MutableBody, SourceInstruction, }; use crate::kani_middle::transform::check_uninit::PointeeInfo; +use crate::kani_middle::transform::check_uninit::{ + get_mem_init_fn_def, mk_layout_operand, resolve_mem_init_fn, PointeeLayout, +}; use crate::kani_middle::transform::check_values::{build_limits, ty_validity_per_offset}; use crate::kani_middle::transform::{TransformPass, TransformationType}; use crate::kani_queries::QueryDb; @@ -29,10 +32,6 @@ use std::fmt::Debug; use strum_macros::AsRefStr; use tracing::trace; -use super::check_uninit::{ - get_mem_init_fn_def, mk_layout_operand, resolve_mem_init_fn, PointeeLayout, -}; - /// Generate the body for a few Kani intrinsics. #[derive(Debug)] pub struct IntrinsicGeneratorPass { diff --git a/kani-compiler/src/kani_middle/transform/mod.rs b/kani-compiler/src/kani_middle/transform/mod.rs index 4f3125e59868..2d963cd1d6eb 100644 --- a/kani-compiler/src/kani_middle/transform/mod.rs +++ b/kani-compiler/src/kani_middle/transform/mod.rs @@ -19,7 +19,7 @@ use crate::kani_middle::codegen_units::CodegenUnit; use crate::kani_middle::reachability::CallGraph; use crate::kani_middle::transform::body::CheckType; -use crate::kani_middle::transform::check_uninit::UninitPass; +use crate::kani_middle::transform::check_uninit::{DelayedUbPass, UninitPass}; use crate::kani_middle::transform::check_values::ValidValuePass; use crate::kani_middle::transform::contracts::{AnyModifiesPass, FunctionWithContractPass}; use crate::kani_middle::transform::kani_intrinsics::IntrinsicGeneratorPass; @@ -32,11 +32,14 @@ use stable_mir::mir::Body; use std::collections::HashMap; use std::fmt::Debug; +pub use internal_mir::RustcInternalMir; + pub(crate) mod body; mod check_uninit; mod check_values; mod contracts; mod dump_mir_pass; +mod internal_mir; mod kani_intrinsics; mod stubs; @@ -192,6 +195,7 @@ pub struct GlobalPasses { impl GlobalPasses { pub fn new(queries: &QueryDb, tcx: TyCtxt) -> Self { let mut global_passes = GlobalPasses { global_passes: vec![] }; + global_passes.add_global_pass(queries, DelayedUbPass::new(CheckType::new_assert(tcx))); global_passes.add_global_pass(queries, DumpMirPass::new(tcx)); global_passes } diff --git a/kani-compiler/src/main.rs b/kani-compiler/src/main.rs index d2f8cf17e9e7..7f1fb144a09b 100644 --- a/kani-compiler/src/main.rs +++ b/kani-compiler/src/main.rs @@ -27,6 +27,7 @@ extern crate rustc_index; extern crate rustc_interface; extern crate rustc_metadata; extern crate rustc_middle; +extern crate rustc_mir_dataflow; extern crate rustc_session; extern crate rustc_smir; extern crate rustc_span; diff --git a/tests/expected/uninit/access-padding-via-cast/expected b/tests/expected/uninit/access-padding-via-cast/expected index e02883b26cdf..12c5c0a4a439 100644 --- a/tests/expected/uninit/access-padding-via-cast/expected +++ b/tests/expected/uninit/access-padding-via-cast/expected @@ -1,4 +1,4 @@ -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut [u8; 4]` VERIFICATION:- FAILED diff --git a/tests/expected/uninit/delayed-ub-transmute/delayed-ub-transmute.rs b/tests/expected/uninit/delayed-ub-transmute/delayed-ub-transmute.rs deleted file mode 100644 index df769e39a8b2..000000000000 --- a/tests/expected/uninit/delayed-ub-transmute/delayed-ub-transmute.rs +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright Kani Contributors -// SPDX-License-Identifier: Apache-2.0 OR MIT -// kani-flags: -Z uninit-checks - -/// Checks that Kani rejects mutable pointer casts between types of different padding. -#[kani::proof] -fn invalid_value() { - unsafe { - let mut value: u128 = 0; - let ptr: *mut (u8, u32, u64) = std::mem::transmute(&mut value as *mut _); - *ptr = (4, 4, 4); // This assignment itself does not cause UB... - let c: u128 = value; // ...but this reads a padding value! - } -} diff --git a/tests/expected/uninit/delayed-ub-transmute/expected b/tests/expected/uninit/delayed-ub-transmute/expected deleted file mode 100644 index e02883b26cdf..000000000000 --- a/tests/expected/uninit/delayed-ub-transmute/expected +++ /dev/null @@ -1,5 +0,0 @@ -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. - -VERIFICATION:- FAILED - -Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/delayed-ub/delayed-ub.rs b/tests/expected/uninit/delayed-ub/delayed-ub.rs index bfed0a1f39a1..feee4bcd161f 100644 --- a/tests/expected/uninit/delayed-ub/delayed-ub.rs +++ b/tests/expected/uninit/delayed-ub/delayed-ub.rs @@ -2,13 +2,165 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT // kani-flags: -Z ghost-state -Z uninit-checks -/// Checks that Kani rejects mutable pointer casts between types of different padding. +//! Checks that Kani catches instances of delayed UB. + +/// Delayed UB via casted mutable pointer write. #[kani::proof] -fn invalid_value() { +fn delayed_ub() { unsafe { let mut value: u128 = 0; + // Cast between two pointers of different padding. let ptr = &mut value as *mut _ as *mut (u8, u32, u64); - *ptr = (4, 4, 4); // This assignment itself does not cause UB... - let c: u128 = value; // ...but this reads a padding value! + *ptr = (4, 4, 4); + let c: u128 = value; // UB: This reads a padding value! + } +} + +/// Delayed UB via transmuted mutable pointer write. +#[kani::proof] +fn delayed_ub_transmute() { + unsafe { + let mut value: u128 = 0; + // Transmute between two pointers of different padding. + let ptr: *mut (u8, u32, u64) = std::mem::transmute(&mut value as *mut _); + *ptr = (4, 4, 4); + let c: u128 = value; // UB: This reads a padding value! + } +} + +static mut VALUE: u128 = 42; + +/// Delayed UB via mutable pointer write into a static. +#[kani::proof] +fn delayed_ub_static() { + unsafe { + let v_ref = &mut VALUE; + // Cast reference to static to a pointer of different padding. + let ptr = &mut VALUE as *mut _ as *mut (u8, u32, u64); + *ptr = (4, 4, 4); + assert!(*v_ref > 0); // UB: This reads a padding value! + } +} + +/// Helper to launder the pointer while keeping the address. +unsafe fn launder(ptr: *mut u128) -> *mut u128 { + let a = ptr; + let b = a as *const u128; + let c: *mut i128 = std::mem::transmute(b); + let d = c as usize; + let e = d + 1; + let f = e - 1; + return f as *mut u128; +} + +/// Delayed UB via mutable pointer write with additional laundering. +#[kani::proof] +fn delayed_ub_laundered() { + unsafe { + let mut value: u128 = 0; + let ptr = &mut value as *mut u128; + // Pass pointer around in an attempt to remove the association. + let ptr = launder(ptr) as *mut (u8, u32, u64); + *ptr = (4, 4, 4); + assert!(value > 0); // UB: This reads a padding value! + } +} + +/// Delayed UB via mutable pointer write with additional laundering but via closure. +#[kani::proof] +fn delayed_ub_closure_laundered() { + unsafe { + let mut value: u128 = 0; + let ptr = &mut value as *mut u128; + // Add extra args to test spread_arg. + let launder = |arg1: bool, arg2: bool, arg3: bool, ptr: *mut u128| -> *mut u128 { + let a = ptr; + let b = a as *const u128; + let c: *mut i128 = std::mem::transmute(b); + let d = c as usize; + let e = d + 1; + let f = e - 1; + return f as *mut u128; + }; + // Pass pointer around in an attempt to remove the association. + let ptr = launder(false, true, false, ptr) as *mut (u8, u32, u64); + *ptr = (4, 4, 4); + assert!(value > 0); // UB: This reads a padding value! + } +} + +/// Delayed UB via mutable pointer write with additional laundering but via closure captures. +#[kani::proof] +fn delayed_ub_closure_capture_laundered() { + unsafe { + let mut value: u128 = 0; + let ptr = &mut value as *mut u128; + // Add extra args to test spread_arg. + let launder = |arg1: bool, arg2: bool, arg3: bool| -> *mut u128 { + let a = ptr; + let b = a as *const u128; + let c: *mut i128 = std::mem::transmute(b); + let d = c as usize; + let e = d + 1; + let f = e - 1; + return f as *mut u128; + }; + // Pass pointer around in an attempt to remove the association. + let ptr = launder(false, true, false) as *mut (u8, u32, u64); + *ptr = (4, 4, 4); + assert!(value > 0); // UB: This reads a padding value! + } +} + +/// Delayed UB via mutable pointer write using `copy_nonoverlapping` under the hood. +#[kani::proof] +fn delayed_ub_copy() { + unsafe { + let mut value: u128 = 0; + let ptr = &mut value as *mut _ as *mut (u8, u32, u64); + // Use `copy_nonoverlapping` in an attempt to remove the taint. + std::ptr::write(ptr, (4, 4, 4)); + assert!(value > 0); // UB: This reads a padding value! + } +} + +struct S { + u: U, +} + +struct U { + value1: u128, + value2: u64, + value3: u32, +} + +struct Inner(*mut T); + +/// Delayed UB via mutable pointer write into inner fields of structs. +#[kani::proof] +fn delayed_ub_structs() { + unsafe { + // Create a convoluted struct. + let mut s: S = S { u: U { value1: 0, value2: 0, value3: 0 } }; + // Get a pointer to an inner field of the struct. Then, cast between two pointers of + // different padding. + let inner = Inner(&mut s.u.value2 as *mut _); + let inner_cast = Inner(inner.0 as *mut (u8, u32)); + let ptr = inner_cast.0; + *ptr = (4, 4); + let u: U = s.u; // UB: This reads a padding value inside the inner struct! + } +} + +/// Delayed UB via mutable pointer write into a slice element. +#[kani::proof] +fn delayed_ub_slices() { + unsafe { + // Create an array. + let mut arr = [0u128; 4]; + // Get a pointer to a part of the array. + let ptr = &mut arr[0..2][0..1][0] as *mut _ as *mut (u8, u32); + *ptr = (4, 4); + let arr_copy = arr; // UB: This reads a padding value inside the array! } } diff --git a/tests/expected/uninit/delayed-ub/expected b/tests/expected/uninit/delayed-ub/expected index e02883b26cdf..46b6ababe85d 100644 --- a/tests/expected/uninit/delayed-ub/expected +++ b/tests/expected/uninit/delayed-ub/expected @@ -1,5 +1,47 @@ -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. +delayed_ub_slices.assertion.4\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `[u128; 4]`" -VERIFICATION:- FAILED +delayed_ub_structs.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `U`" -Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file +delayed_ub_copy.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" + +delayed_ub_closure_capture_laundered.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" + +delayed_ub_closure_laundered.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" + +delayed_ub_laundered.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" + +delayed_ub_static.assertion.4\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" + +delayed_ub_transmute.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" + +delayed_ub.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" + +Summary: +Verification failed for - delayed_ub_slices +Verification failed for - delayed_ub_structs +Verification failed for - delayed_ub_copy +Verification failed for - delayed_ub_closure_capture_laundered +Verification failed for - delayed_ub_closure_laundered +Verification failed for - delayed_ub_laundered +Verification failed for - delayed_ub_static +Verification failed for - delayed_ub_transmute +Verification failed for - delayed_ub +Complete - 0 successfully verified harnesses, 9 failures, 9 total. diff --git a/tests/expected/uninit/intrinsics/expected b/tests/expected/uninit/intrinsics/expected index ffa98b6f1140..cf34d305608b 100644 --- a/tests/expected/uninit/intrinsics/expected +++ b/tests/expected/uninit/intrinsics/expected @@ -1,64 +1,46 @@ -Checking harness check_typed_swap_safe... +std::ptr::read::>.assertion.1\ + - Status: FAILURE\ + - Description: "Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit." -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. +std::ptr::read::>.assertion.2\ + - Status: FAILURE\ + - Description: "Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit." -VERIFICATION:- FAILED +std::ptr::write::>.assertion.1\ + - Status: FAILURE\ + - Description: "Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit." -Checking harness check_typed_swap... +std::ptr::write::>.assertion.2\ + - Status: FAILURE\ + - Description: "Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit." -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. +check_typed_swap.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8`" -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8` +check_typed_swap.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8`" -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8` +check_volatile_load.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" -VERIFICATION:- FAILED +check_compare_bytes.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" -Checking harness check_volatile_store_and_load_safe... +check_compare_bytes.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" -VERIFICATION:- SUCCESSFUL +std::intrinsics::copy::.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" -Checking harness check_volatile_load... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -VERIFICATION:- FAILED - -Checking harness check_write_bytes_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_compare_bytes_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_compare_bytes... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -VERIFICATION:- FAILED - -Checking harness check_copy_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_copy... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -VERIFICATION:- FAILED - -Checking harness check_copy_nonoverlapping_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_copy_nonoverlapping... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -VERIFICATION:- FAILED +std::intrinsics::copy_nonoverlapping::.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" Summary: Verification failed for - check_typed_swap_safe