From 14133c6be14a14e8f61175a40655f92b2cc2aef2 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Tue, 6 Aug 2024 16:24:52 -0700 Subject: [PATCH 1/6] analyze: dataflow: use trace! to print assignment instead of debug! --- c2rust-analyze/src/dataflow/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/c2rust-analyze/src/dataflow/mod.rs b/c2rust-analyze/src/dataflow/mod.rs index bf434f820..2d4d1e0ed 100644 --- a/c2rust-analyze/src/dataflow/mod.rs +++ b/c2rust-analyze/src/dataflow/mod.rs @@ -4,7 +4,7 @@ use crate::context::{AnalysisCtxt, Assignment, FlagSet, PermissionSet, PointerId use crate::pointee_type::PointeeTypes; use crate::pointer_id::{GlobalPointerTable, PointerTable}; use crate::recent_writes::RecentWrites; -use log::debug; +use log::{debug, trace}; use rustc_middle::mir::Body; mod type_check; @@ -71,9 +71,9 @@ impl DataflowConstraints { for c in &self.constraints { debug!(" {:?}", c); } - debug!("hypothesis:"); + trace!("hypothesis:"); for (id, p) in hypothesis.iter() { - debug!(" {}: {:?}", id, p); + trace!(" {}: {:?}", id, p); } struct PropagatePerms; From 1f25acd8b6a58f860cb0cf853597fb6724bc6abe Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 21 Aug 2024 16:43:44 -0700 Subject: [PATCH 2/6] analyze: rewrite: add mutbl field to RvalueDesc --- c2rust-analyze/src/borrowck/mod.rs | 14 ++++++++++---- c2rust-analyze/src/context.rs | 14 ++++++++++---- c2rust-analyze/src/dataflow/type_check.rs | 6 +++++- c2rust-analyze/src/pointee_type/type_check.rs | 5 ++++- c2rust-analyze/src/util.rs | 14 ++++++++++++++ 5 files changed, 43 insertions(+), 10 deletions(-) diff --git a/c2rust-analyze/src/borrowck/mod.rs b/c2rust-analyze/src/borrowck/mod.rs index 3553c04a8..bf66e90ad 100644 --- a/c2rust-analyze/src/borrowck/mod.rs +++ b/c2rust-analyze/src/borrowck/mod.rs @@ -164,12 +164,18 @@ pub fn borrowck_mir<'tcx>( }); let ptr = match stmt.kind { StatementKind::Assign(ref x) => match describe_rvalue(&x.1) { - Some(RvalueDesc::Project { base, proj: _ }) => acx + Some(RvalueDesc::Project { + base, + proj: _, + mutbl: _, + }) => acx .ptr_of(base) .unwrap_or_else(|| panic!("missing pointer ID for {:?}", base)), - Some(RvalueDesc::AddrOfLocal { local, proj: _ }) => { - acx.addr_of_local[local] - } + Some(RvalueDesc::AddrOfLocal { + local, + proj: _, + mutbl: _, + }) => acx.addr_of_local[local], None => panic!("loan {:?} was issued by unknown rvalue {:?}?", loan, x.1), }, _ => panic!("loan {:?} was issued by non-assign stmt {:?}?", loan, stmt), diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 14e906773..0857831dd 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -1157,7 +1157,11 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> { let ty = rv.ty(self, self.tcx()); if matches!(ty.kind(), TyKind::Ref(..) | TyKind::RawPtr(..)) { let (pointee_lty, proj, ptr) = match desc { - RvalueDesc::Project { base, proj } => { + RvalueDesc::Project { + base, + proj, + mutbl: _, + } => { let base_lty = self.type_of(base); debug!( "rvalue = {:?}, desc = {:?}, base_lty = {:?}", @@ -1169,9 +1173,11 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> { base_lty.label, ) } - RvalueDesc::AddrOfLocal { local, proj } => { - (self.type_of(local), proj, self.addr_of_local[local]) - } + RvalueDesc::AddrOfLocal { + local, + proj, + mutbl: _, + } => (self.type_of(local), proj, self.addr_of_local[local]), }; let mut pointee_lty = pointee_lty; diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index fa7647e36..74a87db46 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -216,7 +216,11 @@ impl<'tcx> TypeChecker<'tcx, '_> { if let Some(desc) = rv_desc { match desc { - RvalueDesc::Project { base, proj: _ } => { + RvalueDesc::Project { + base, + proj: _, + mutbl: _, + } => { // TODO: mutability should probably depend on mutability of the output ref/ptr self.visit_place_ref(base, Mutability::Not); } diff --git a/c2rust-analyze/src/pointee_type/type_check.rs b/c2rust-analyze/src/pointee_type/type_check.rs index d5ed9c69d..3d9200aa6 100644 --- a/c2rust-analyze/src/pointee_type/type_check.rs +++ b/c2rust-analyze/src/pointee_type/type_check.rs @@ -87,7 +87,10 @@ impl<'tcx> TypeChecker<'tcx, '_> { pub fn visit_rvalue(&mut self, rv: &Rvalue<'tcx>, lty: LTy<'tcx>) { trace!("visit_rvalue({rv:?}, {lty:?})"); - if let Some(RvalueDesc::Project { base, proj: &[] }) = describe_rvalue(rv) { + if let Some(RvalueDesc::Project { + base, proj: &[], .. + }) = describe_rvalue(rv) + { // Special case for no-op projections like `&*p`. Since the pointer is passed through // unchanged, we don't require the pointee type to actually match the type used for the // paired deref and address-of operations. diff --git a/c2rust-analyze/src/util.rs b/c2rust-analyze/src/util.rs index 3212f3997..1239cae6f 100644 --- a/c2rust-analyze/src/util.rs +++ b/c2rust-analyze/src/util.rs @@ -30,6 +30,8 @@ pub enum RvalueDesc<'tcx> { base: PlaceRef<'tcx>, /// The projection applied to the pointer. This contains no `Deref` projections. proj: &'tcx [PlaceElem<'tcx>], + /// Mutability of the resulting reference. + mutbl: Mutability, }, /// The address of a local or one of its fields, such as `&x.y`. The rvalue is split into a /// base local (in this case `x`) and a projection (`.y`). The `&` is implicit. @@ -37,6 +39,8 @@ pub enum RvalueDesc<'tcx> { local: Local, /// The projection applied to the local. This contains no `Deref` projections. proj: &'tcx [PlaceElem<'tcx>], + /// Mutability of the resulting reference. + mutbl: Mutability, }, } @@ -46,10 +50,18 @@ pub fn describe_rvalue<'tcx>(rv: &Rvalue<'tcx>) -> Option> { Operand::Move(pl) | Operand::Copy(pl) => RvalueDesc::Project { base: pl.as_ref(), proj: &[], + // This is an rvalue of an `Assign` statement, so it's always in a non-mutable + // position. + mutbl: Mutability::Not, }, Operand::Constant(_) => return None, }, Rvalue::Ref(_, _, pl) | Rvalue::AddressOf(_, pl) => { + let mutbl = match *rv { + Rvalue::Ref(_, kind, _) => kind.to_mutbl_lossy(), + Rvalue::AddressOf(mutbl, _) => mutbl, + _ => unreachable!(), + }; let projection = &pl.projection[..]; match projection .iter() @@ -63,6 +75,7 @@ pub fn describe_rvalue<'tcx>(rv: &Rvalue<'tcx>) -> Option> { projection: &projection[..i], }, proj: &projection[i + 1..], + mutbl, } } None => { @@ -70,6 +83,7 @@ pub fn describe_rvalue<'tcx>(rv: &Rvalue<'tcx>) -> Option> { RvalueDesc::AddrOfLocal { local: pl.local, proj: projection, + mutbl, } } } From dc13392df95d379d3e1a803e00e677e7bbe74a55 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 21 Aug 2024 16:48:51 -0700 Subject: [PATCH 3/6] analyze: dataflow: require WRITE for `&mut *p` --- c2rust-analyze/src/dataflow/type_check.rs | 10 +++++++--- c2rust-analyze/tests/filecheck/non_null.rs | 2 +- c2rust-analyze/tests/filecheck/non_null_force.rs | 8 ++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index 74a87db46..2500e729d 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -219,10 +219,14 @@ impl<'tcx> TypeChecker<'tcx, '_> { RvalueDesc::Project { base, proj: _, - mutbl: _, + mutbl, } => { - // TODO: mutability should probably depend on mutability of the output ref/ptr - self.visit_place_ref(base, Mutability::Not); + self.visit_place_ref(base, mutbl); + let base_ptr = self.acx.type_of(base).label; + // Note that even non-ptr copy operations are treated as no-op `Project`s. + if !base_ptr.is_none() { + self.record_access(base_ptr, mutbl); + } } RvalueDesc::AddrOfLocal { .. } => {} } diff --git a/c2rust-analyze/tests/filecheck/non_null.rs b/c2rust-analyze/tests/filecheck/non_null.rs index 06d1da8b2..c2d534c5b 100644 --- a/c2rust-analyze/tests/filecheck/non_null.rs +++ b/c2rust-analyze/tests/filecheck/non_null.rs @@ -23,7 +23,7 @@ fn g(cond: bool) { // CHECK-LABEL: final labeling for "h" fn h(cond: bool) { let x = 1_i32; - // CHECK: ([[@LINE+1]]: y): {{.*}}, type = UNIQUE | NON_NULL | STACK# + // CHECK: ([[@LINE+1]]: y): {{.*}}, type = READ | UNIQUE | NON_NULL | STACK# let y = ptr::addr_of!(x); // CHECK: ([[@LINE+1]]: z): {{.*}}, type = UNIQUE | STACK# let z = if cond { diff --git a/c2rust-analyze/tests/filecheck/non_null_force.rs b/c2rust-analyze/tests/filecheck/non_null_force.rs index 171d5283b..1f28e0ce0 100644 --- a/c2rust-analyze/tests/filecheck/non_null_force.rs +++ b/c2rust-analyze/tests/filecheck/non_null_force.rs @@ -6,26 +6,26 @@ use std::ptr; // CHECK-LABEL: final labeling for "f" fn f(cond: bool) { let x = 1_i32; - // CHECK: ([[@LINE+1]]: mut y): {{.*}}, type = UNIQUE | STACK# + // CHECK: ([[@LINE+1]]: mut y): {{.*}}, type = READ | UNIQUE | STACK# let mut y = ptr::addr_of!(x); if cond { y = 0 as *const _; } // The expression `y` is considered nullable even though it's passed for argument `p` of `g`, // which is forced to be `NON_NULL`. - // CHECK: ([[@LINE+1]]: y): {{.*}}, type = UNIQUE | STACK# + // CHECK: ([[@LINE+1]]: y): {{.*}}, type = READ | UNIQUE | STACK# g(cond, y); } // CHECK-LABEL: final labeling for "g" // `p` should be non-null, as it's forced to be by the attribute. This emulates the "unsound" PDG // case, where a variable is forced to stay `NON_NULL` even though a null possibly flows into it. -// CHECK: ([[@LINE+2]]: p): {{.*}}, type = UNIQUE | NON_NULL | STACK# +// CHECK: ([[@LINE+2]]: p): {{.*}}, type = READ | UNIQUE | NON_NULL | STACK# #[c2rust_analyze_test::force_non_null_args] fn g(cond: bool, p: *const i32) { // `q` is not forced to be `NON_NULL`, so it should be inferred nullable due to the null // assignment below. - // CHECK: ([[@LINE+1]]: mut q): {{.*}}, type = UNIQUE | STACK# + // CHECK: ([[@LINE+1]]: mut q): {{.*}}, type = READ | UNIQUE | STACK# let mut q = p; if cond { q = 0 as *const _; From 82c97bd8b45fc162886c5120ff5ea93fe63fa540 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 4 Sep 2024 13:05:48 -0700 Subject: [PATCH 4/6] analyze: add script for collecting successfully rewritten defs --- .../scripts/extract_working_defs.py | 115 ++++++++++++++++++ c2rust-analyze/src/analyze.rs | 10 ++ c2rust-analyze/src/main.rs | 11 ++ 3 files changed, 136 insertions(+) create mode 100644 c2rust-analyze/scripts/extract_working_defs.py diff --git a/c2rust-analyze/scripts/extract_working_defs.py b/c2rust-analyze/scripts/extract_working_defs.py new file mode 100644 index 000000000..0fab4986c --- /dev/null +++ b/c2rust-analyze/scripts/extract_working_defs.py @@ -0,0 +1,115 @@ +""" +Script for extracting defs on which rewriting succeeded. Currently, this is +specific to lighttpd_rust_amalgamated, but it could be generalized to work on +other projects as well. The script identifies "working defs" by reading the +log of a `c2rust-analyze` run and collecting defs that have no reported +"analysis failed" errors and are not listed in the `--fixed-defs-list`. It +then locates each working def in the rewritten code by searching for "start/end +of def" comments and prints the code for each working def to stdout. +""" +from dataclasses import dataclass +import re +import subprocess +import sys + +ANALYSIS_FAILED_RE = re.compile('analysis of (DefId\([^)]*\)) failed:.*$') +DEF_SPAN_RE = re.compile('(DefId\([^)]*\)) @ (.*)$') +# src/main.rs:422:1: 422:49 (#0) +SPAN_RE = re.compile('([^:]+):([0-9]+):([0-9]+): ([0-9]+):([0-9]+) \(#[0-9]+\)') + +START_OF_DEF_RE = re.compile(' *// [0-9]*: [^:]*: start of def (DefId\([^)]*\))$') +END_OF_DEF_RE = re.compile(' *// [0-9]*: [^:]*: end of def (DefId\([^)]*\))$') + +HEADER = ''' +#![feature(rustc_private)] +#![feature(register_tool)] +#![register_tool(c2rust_analyze_test)] + +extern crate libc; +extern crate core; + +pub type __uint32_t = libc::c_uint; +pub type __intmax_t = libc::c_long; +pub type __uintmax_t = libc::c_ulong; +pub type size_t = libc::c_ulong; +pub type uint32_t = __uint32_t; +pub type uint_fast32_t = libc::c_ulong; +pub type intmax_t = __intmax_t; +pub type uintmax_t = __uintmax_t; + +#[no_mangle] +#[cold] +#[c2rust_analyze_test::fixed_signature] +pub extern "C" fn ck_assert_failed( +) -> ! { + panic!(); +} +''' + +def main(): + fixed_defs_path, log_path = sys.argv[1:] + + error_summary = subprocess.run(('grep', '-A999999', '^error summary:$', log_path), + check=True, capture_output=True, text=True).stdout + error_defs = set() + for line in error_summary.splitlines(): + line = line.strip() + m = ANALYSIS_FAILED_RE.match(line) + if m is not None: + error_defs.add(m.group(1)) + else: + print('bad line %r' % (line,), file=sys.stderr) + + unfixed_defs = set() + for line in open(fixed_defs_path): + line = line.strip() + if line.startswith('#'): + line = line[1:].strip() + print('unfixed def: %r' % line, file=sys.stderr) + unfixed_defs.add(line) + + rewritten_defs = unfixed_defs - error_defs + + f = open('../../lighttpd-rust/lighttpd_rust_amalgamated/src/main.new.rs') + lines = list(f) + start_of_def = {} + end_of_def = {} + last_non_prefix_line = 0 + prev_last_non_prefix_line = 0 + for i, line in enumerate(lines): + m = START_OF_DEF_RE.match(line) + if m is not None: + start_of_def[m.group(1)] = prev_last_non_prefix_line + 1 + m = END_OF_DEF_RE.match(line) + if m is not None: + end_of_def[m.group(1)] = i + line = line.strip() + if not line.startswith('//') and not line.startswith('#'): + prev_last_non_prefix_line = last_non_prefix_line + last_non_prefix_line = i + + + print(HEADER) + + for did in sorted(rewritten_defs): + print('\n// BEGIN %s' % did) + if did not in start_of_def: + continue + start = start_of_def[did] + end = end_of_def[did] + 1 + in_prefix_comments = True + for line in lines[start:end]: + if in_prefix_comments: + if not line.strip().startswith('//'): + in_prefix_comments = False + else: + continue + sys.stdout.write(line) + print('// END %s' % did) + + print('extracted %d working defs:' % len(rewritten_defs), file=sys.stderr) + for did in sorted(rewritten_defs): + print(did, file=sys.stderr) + +if __name__ == '__main__': + main() diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 18d407a45..3b5a11a2d 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1511,6 +1511,16 @@ fn run2<'tcx>( info.acx_data.set(acx.into_data()); } + // Annotate begin/end of each def. This is used by extract_working_defs.py to locate defs that + // were rewritten successfully. + if env::var("C2RUST_ANALYZE_ANNOTATE_DEF_SPANS").as_deref() == Ok("1") { + for ldid in tcx.hir_crate_items(()).definitions() { + let span = tcx.source_span(ldid); + ann.emit(span.shrink_to_lo(), format_args!("start of def {ldid:?}")); + ann.emit(span.shrink_to_hi(), format_args!("end of def {ldid:?}")); + } + } + // Print results for `static` items. debug!("\nfinal labeling for static items:"); let lcx1 = crate::labeled_ty::LabeledTyCtxt::new(tcx); diff --git a/c2rust-analyze/src/main.rs b/c2rust-analyze/src/main.rs index 219fbafbe..ce392588e 100644 --- a/c2rust-analyze/src/main.rs +++ b/c2rust-analyze/src/main.rs @@ -98,6 +98,12 @@ struct Args { #[clap(long)] use_manual_shims: bool, + /// Add "start/end of def" annotations as comments around each definition. These annotations + /// are used by `scripts/extract_working_defs.py` to locate specific defs in the rewritten + /// code. + #[clap(long)] + annotate_def_spans: bool, + /// Read a list of defs that should be marked non-rewritable (`FIXED`) from this file path. /// Run `c2rust-analyze` without this option and check the debug output for a full list of defs /// in the crate being analyzed; the file passed to this option should list a subset of those @@ -401,6 +407,7 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> { mut rewrite_mode, rewrite_in_place, use_manual_shims, + annotate_def_spans, fixed_defs_list, force_rewrite_defs_list, skip_pointee_defs_list, @@ -478,6 +485,10 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> { cmd.env("C2RUST_ANALYZE_USE_MANUAL_SHIMS", "1"); } + if annotate_def_spans { + cmd.env("C2RUST_ANALYZE_ANNOTATE_DEF_SPANS", "1"); + } + Ok(()) })?; From 3210b1cf1008bd6283f3fc07c9024c497b749a37 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 5 Sep 2024 09:28:14 -0700 Subject: [PATCH 5/6] analyze: run_pointwise_metrics.sh: run build first to ensure deps exist --- c2rust-analyze/scripts/run_pointwise_metrics.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/c2rust-analyze/scripts/run_pointwise_metrics.sh b/c2rust-analyze/scripts/run_pointwise_metrics.sh index c17306401..5010df242 100755 --- a/c2rust-analyze/scripts/run_pointwise_metrics.sh +++ b/c2rust-analyze/scripts/run_pointwise_metrics.sh @@ -33,6 +33,10 @@ now=$(date +%Y%m%d-%H%M%S) project="$(basename "$MODULE_DIR")" case "$project" in lighttpd_*) + # Make sure the project has been built first. This ensures that the + # `extern` function can find the libraries it needs. + cargo build --manifest-path "$MODULE_DIR/Cargo.toml" + rustc_flags=( --edition 2021 --crate-type rlib From 030e0f8dbc5f96af1e6137136f84af8c54bcb558 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 5 Sep 2024 09:27:50 -0700 Subject: [PATCH 6/6] analyze: fix pointwise_metrics.py crash when denominator is zero --- c2rust-analyze/scripts/pointwise_metrics.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/c2rust-analyze/scripts/pointwise_metrics.py b/c2rust-analyze/scripts/pointwise_metrics.py index 6cb6cf0ed..9459c3726 100644 --- a/c2rust-analyze/scripts/pointwise_metrics.py +++ b/c2rust-analyze/scripts/pointwise_metrics.py @@ -50,17 +50,23 @@ def read_func_errors(f): func_errors[func] = errors return func_errors +def calc_pct(n, d): + if d == 0: + return 0 + else: + return n / d * 100 + pointwise_func_errors = read_func_errors(open(pointwise_log_path)) pointwise_ok = set(func for func, errors in pointwise_func_errors.items() if errors == 0) print('pointwise: %5d/%d functions passed (%.1f%%)' % ( len(pointwise_ok), len(pointwise_func_errors), - len(pointwise_ok) / len(pointwise_func_errors) * 100)) + calc_pct(len(pointwise_ok), len(pointwise_func_errors)))) unmodified_func_errors = read_func_errors(open(unmodified_log_path)) unmodified_ok = set(func for func, errors in unmodified_func_errors.items() if errors == 0) print('unmodified: %5d/%d functions passed (%.1f%%)' % ( len(unmodified_ok), len(unmodified_func_errors), - len(unmodified_ok) / len(unmodified_func_errors) * 100)) + calc_pct(len(unmodified_ok), len(unmodified_func_errors)))) assert len(pointwise_func_errors) == len(unmodified_func_errors) num_total = len(pointwise_func_errors) @@ -69,7 +75,7 @@ def read_func_errors(f): improved = pointwise_ok - unmodified_ok print('improved: %5d/%d functions (%.1f%%)' % ( - len(improved), num_unmodified_bad, len(improved) / num_unmodified_bad * 100)) + len(improved), num_unmodified_bad, calc_pct(len(improved), num_unmodified_bad))) broke = unmodified_ok - pointwise_ok print('broke: %5d/%d functions (%.1f%%)' % ( - len(broke), num_unmodified_ok, len(broke) / num_unmodified_ok * 100)) + len(broke), num_unmodified_ok, calc_pct(len(broke), num_unmodified_ok)))