Skip to content

Commit

Permalink
Merge branch 'analyze-buffer-misc-fixes-2' into sp/cleanup-20240923
Browse files Browse the repository at this point in the history
  • Loading branch information
spernsteiner committed Dec 2, 2024
2 parents 12db490 + 030e0f8 commit 1a418d6
Show file tree
Hide file tree
Showing 13 changed files with 207 additions and 24 deletions.
115 changes: 115 additions & 0 deletions c2rust-analyze/scripts/extract_working_defs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
Script for extracting defs on which rewriting succeeded. Currently, this is
specific to lighttpd_rust_amalgamated, but it could be generalized to work on
other projects as well. The script identifies "working defs" by reading the
log of a `c2rust-analyze` run and collecting defs that have no reported
"analysis failed" errors and are not listed in the `--fixed-defs-list`. It
then locates each working def in the rewritten code by searching for "start/end
of def" comments and prints the code for each working def to stdout.
"""
from dataclasses import dataclass
import re
import subprocess
import sys

ANALYSIS_FAILED_RE = re.compile('analysis of (DefId\([^)]*\)) failed:.*$')
DEF_SPAN_RE = re.compile('(DefId\([^)]*\)) @ (.*)$')
# src/main.rs:422:1: 422:49 (#0)
SPAN_RE = re.compile('([^:]+):([0-9]+):([0-9]+): ([0-9]+):([0-9]+) \(#[0-9]+\)')

START_OF_DEF_RE = re.compile(' *// [0-9]*: [^:]*: start of def (DefId\([^)]*\))$')
END_OF_DEF_RE = re.compile(' *// [0-9]*: [^:]*: end of def (DefId\([^)]*\))$')

HEADER = '''
#![feature(rustc_private)]
#![feature(register_tool)]
#![register_tool(c2rust_analyze_test)]
extern crate libc;
extern crate core;
pub type __uint32_t = libc::c_uint;
pub type __intmax_t = libc::c_long;
pub type __uintmax_t = libc::c_ulong;
pub type size_t = libc::c_ulong;
pub type uint32_t = __uint32_t;
pub type uint_fast32_t = libc::c_ulong;
pub type intmax_t = __intmax_t;
pub type uintmax_t = __uintmax_t;
#[no_mangle]
#[cold]
#[c2rust_analyze_test::fixed_signature]
pub extern "C" fn ck_assert_failed(
) -> ! {
panic!();
}
'''

def main():
fixed_defs_path, log_path = sys.argv[1:]

error_summary = subprocess.run(('grep', '-A999999', '^error summary:$', log_path),
check=True, capture_output=True, text=True).stdout
error_defs = set()
for line in error_summary.splitlines():
line = line.strip()
m = ANALYSIS_FAILED_RE.match(line)
if m is not None:
error_defs.add(m.group(1))
else:
print('bad line %r' % (line,), file=sys.stderr)

unfixed_defs = set()
for line in open(fixed_defs_path):
line = line.strip()
if line.startswith('#'):
line = line[1:].strip()
print('unfixed def: %r' % line, file=sys.stderr)
unfixed_defs.add(line)

rewritten_defs = unfixed_defs - error_defs

f = open('../../lighttpd-rust/lighttpd_rust_amalgamated/src/main.new.rs')
lines = list(f)
start_of_def = {}
end_of_def = {}
last_non_prefix_line = 0
prev_last_non_prefix_line = 0
for i, line in enumerate(lines):
m = START_OF_DEF_RE.match(line)
if m is not None:
start_of_def[m.group(1)] = prev_last_non_prefix_line + 1
m = END_OF_DEF_RE.match(line)
if m is not None:
end_of_def[m.group(1)] = i
line = line.strip()
if not line.startswith('//') and not line.startswith('#'):
prev_last_non_prefix_line = last_non_prefix_line
last_non_prefix_line = i


print(HEADER)

for did in sorted(rewritten_defs):
print('\n// BEGIN %s' % did)
if did not in start_of_def:
continue
start = start_of_def[did]
end = end_of_def[did] + 1
in_prefix_comments = True
for line in lines[start:end]:
if in_prefix_comments:
if not line.strip().startswith('//'):
in_prefix_comments = False
else:
continue
sys.stdout.write(line)
print('// END %s' % did)

print('extracted %d working defs:' % len(rewritten_defs), file=sys.stderr)
for did in sorted(rewritten_defs):
print(did, file=sys.stderr)

if __name__ == '__main__':
main()
14 changes: 10 additions & 4 deletions c2rust-analyze/scripts/pointwise_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,23 @@ def read_func_errors(f):
func_errors[func] = errors
return func_errors

def calc_pct(n, d):
if d == 0:
return 0
else:
return n / d * 100

pointwise_func_errors = read_func_errors(open(pointwise_log_path))
pointwise_ok = set(func for func, errors in pointwise_func_errors.items() if errors == 0)
print('pointwise: %5d/%d functions passed (%.1f%%)' % (
len(pointwise_ok), len(pointwise_func_errors),
len(pointwise_ok) / len(pointwise_func_errors) * 100))
calc_pct(len(pointwise_ok), len(pointwise_func_errors))))

unmodified_func_errors = read_func_errors(open(unmodified_log_path))
unmodified_ok = set(func for func, errors in unmodified_func_errors.items() if errors == 0)
print('unmodified: %5d/%d functions passed (%.1f%%)' % (
len(unmodified_ok), len(unmodified_func_errors),
len(unmodified_ok) / len(unmodified_func_errors) * 100))
calc_pct(len(unmodified_ok), len(unmodified_func_errors))))

assert len(pointwise_func_errors) == len(unmodified_func_errors)
num_total = len(pointwise_func_errors)
Expand All @@ -69,7 +75,7 @@ def read_func_errors(f):

improved = pointwise_ok - unmodified_ok
print('improved: %5d/%d functions (%.1f%%)' % (
len(improved), num_unmodified_bad, len(improved) / num_unmodified_bad * 100))
len(improved), num_unmodified_bad, calc_pct(len(improved), num_unmodified_bad)))
broke = unmodified_ok - pointwise_ok
print('broke: %5d/%d functions (%.1f%%)' % (
len(broke), num_unmodified_ok, len(broke) / num_unmodified_ok * 100))
len(broke), num_unmodified_ok, calc_pct(len(broke), num_unmodified_ok)))
4 changes: 4 additions & 0 deletions c2rust-analyze/scripts/run_pointwise_metrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ now=$(date +%Y%m%d-%H%M%S)
project="$(basename "$MODULE_DIR")"
case "$project" in
lighttpd_*)
# Make sure the project has been built first. This ensures that the
# `extern` function can find the libraries it needs.
cargo build --manifest-path "$MODULE_DIR/Cargo.toml"

rustc_flags=(
--edition 2021
--crate-type rlib
Expand Down
10 changes: 10 additions & 0 deletions c2rust-analyze/src/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1511,6 +1511,16 @@ fn run2<'tcx>(
info.acx_data.set(acx.into_data());
}

// Annotate begin/end of each def. This is used by extract_working_defs.py to locate defs that
// were rewritten successfully.
if env::var("C2RUST_ANALYZE_ANNOTATE_DEF_SPANS").as_deref() == Ok("1") {
for ldid in tcx.hir_crate_items(()).definitions() {
let span = tcx.source_span(ldid);
ann.emit(span.shrink_to_lo(), format_args!("start of def {ldid:?}"));
ann.emit(span.shrink_to_hi(), format_args!("end of def {ldid:?}"));
}
}

// Print results for `static` items.
debug!("\nfinal labeling for static items:");
let lcx1 = crate::labeled_ty::LabeledTyCtxt::new(tcx);
Expand Down
14 changes: 10 additions & 4 deletions c2rust-analyze/src/borrowck/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,18 @@ pub fn borrowck_mir<'tcx>(
});
let ptr = match stmt.kind {
StatementKind::Assign(ref x) => match describe_rvalue(&x.1) {
Some(RvalueDesc::Project { base, proj: _ }) => acx
Some(RvalueDesc::Project {
base,
proj: _,
mutbl: _,
}) => acx
.ptr_of(base)
.unwrap_or_else(|| panic!("missing pointer ID for {:?}", base)),
Some(RvalueDesc::AddrOfLocal { local, proj: _ }) => {
acx.addr_of_local[local]
}
Some(RvalueDesc::AddrOfLocal {
local,
proj: _,
mutbl: _,
}) => acx.addr_of_local[local],
None => panic!("loan {:?} was issued by unknown rvalue {:?}?", loan, x.1),
},
_ => panic!("loan {:?} was issued by non-assign stmt {:?}?", loan, stmt),
Expand Down
14 changes: 10 additions & 4 deletions c2rust-analyze/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,11 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> {
let ty = rv.ty(self, self.tcx());
if matches!(ty.kind(), TyKind::Ref(..) | TyKind::RawPtr(..)) {
let (pointee_lty, proj, ptr) = match desc {
RvalueDesc::Project { base, proj } => {
RvalueDesc::Project {
base,
proj,
mutbl: _,
} => {
let base_lty = self.type_of(base);
debug!(
"rvalue = {:?}, desc = {:?}, base_lty = {:?}",
Expand All @@ -1169,9 +1173,11 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> {
base_lty.label,
)
}
RvalueDesc::AddrOfLocal { local, proj } => {
(self.type_of(local), proj, self.addr_of_local[local])
}
RvalueDesc::AddrOfLocal {
local,
proj,
mutbl: _,
} => (self.type_of(local), proj, self.addr_of_local[local]),
};

let mut pointee_lty = pointee_lty;
Expand Down
6 changes: 3 additions & 3 deletions c2rust-analyze/src/dataflow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::context::{AnalysisCtxt, Assignment, FlagSet, PermissionSet, PointerId
use crate::pointee_type::PointeeTypes;
use crate::pointer_id::{GlobalPointerTable, PointerTable};
use crate::recent_writes::RecentWrites;
use log::debug;
use log::{debug, trace};
use rustc_middle::mir::Body;

mod type_check;
Expand Down Expand Up @@ -71,9 +71,9 @@ impl DataflowConstraints {
for c in &self.constraints {
debug!(" {:?}", c);
}
debug!("hypothesis:");
trace!("hypothesis:");
for (id, p) in hypothesis.iter() {
debug!(" {}: {:?}", id, p);
trace!(" {}: {:?}", id, p);
}

struct PropagatePerms;
Expand Down
14 changes: 11 additions & 3 deletions c2rust-analyze/src/dataflow/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,17 @@ impl<'tcx> TypeChecker<'tcx, '_> {

if let Some(desc) = rv_desc {
match desc {
RvalueDesc::Project { base, proj: _ } => {
// TODO: mutability should probably depend on mutability of the output ref/ptr
self.visit_place_ref(base, Mutability::Not);
RvalueDesc::Project {
base,
proj: _,
mutbl,
} => {
self.visit_place_ref(base, mutbl);
let base_ptr = self.acx.type_of(base).label;
// Note that even non-ptr copy operations are treated as no-op `Project`s.
if !base_ptr.is_none() {
self.record_access(base_ptr, mutbl);
}
}
RvalueDesc::AddrOfLocal { .. } => {}
}
Expand Down
11 changes: 11 additions & 0 deletions c2rust-analyze/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ struct Args {
#[clap(long)]
use_manual_shims: bool,

/// Add "start/end of def" annotations as comments around each definition. These annotations
/// are used by `scripts/extract_working_defs.py` to locate specific defs in the rewritten
/// code.
#[clap(long)]
annotate_def_spans: bool,

/// Read a list of defs that should be marked non-rewritable (`FIXED`) from this file path.
/// Run `c2rust-analyze` without this option and check the debug output for a full list of defs
/// in the crate being analyzed; the file passed to this option should list a subset of those
Expand Down Expand Up @@ -401,6 +407,7 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> {
mut rewrite_mode,
rewrite_in_place,
use_manual_shims,
annotate_def_spans,
fixed_defs_list,
force_rewrite_defs_list,
skip_pointee_defs_list,
Expand Down Expand Up @@ -478,6 +485,10 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> {
cmd.env("C2RUST_ANALYZE_USE_MANUAL_SHIMS", "1");
}

if annotate_def_spans {
cmd.env("C2RUST_ANALYZE_ANNOTATE_DEF_SPANS", "1");
}

Ok(())
})?;

Expand Down
5 changes: 4 additions & 1 deletion c2rust-analyze/src/pointee_type/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ impl<'tcx> TypeChecker<'tcx, '_> {
pub fn visit_rvalue(&mut self, rv: &Rvalue<'tcx>, lty: LTy<'tcx>) {
trace!("visit_rvalue({rv:?}, {lty:?})");

if let Some(RvalueDesc::Project { base, proj: &[] }) = describe_rvalue(rv) {
if let Some(RvalueDesc::Project {
base, proj: &[], ..
}) = describe_rvalue(rv)
{
// Special case for no-op projections like `&*p`. Since the pointer is passed through
// unchanged, we don't require the pointee type to actually match the type used for the
// paired deref and address-of operations.
Expand Down
14 changes: 14 additions & 0 deletions c2rust-analyze/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@ pub enum RvalueDesc<'tcx> {
base: PlaceRef<'tcx>,
/// The projection applied to the pointer. This contains no `Deref` projections.
proj: &'tcx [PlaceElem<'tcx>],
/// Mutability of the resulting reference.
mutbl: Mutability,
},
/// The address of a local or one of its fields, such as `&x.y`. The rvalue is split into a
/// base local (in this case `x`) and a projection (`.y`). The `&` is implicit.
AddrOfLocal {
local: Local,
/// The projection applied to the local. This contains no `Deref` projections.
proj: &'tcx [PlaceElem<'tcx>],
/// Mutability of the resulting reference.
mutbl: Mutability,
},
}

Expand All @@ -46,10 +50,18 @@ pub fn describe_rvalue<'tcx>(rv: &Rvalue<'tcx>) -> Option<RvalueDesc<'tcx>> {
Operand::Move(pl) | Operand::Copy(pl) => RvalueDesc::Project {
base: pl.as_ref(),
proj: &[],
// This is an rvalue of an `Assign` statement, so it's always in a non-mutable
// position.
mutbl: Mutability::Not,
},
Operand::Constant(_) => return None,
},
Rvalue::Ref(_, _, pl) | Rvalue::AddressOf(_, pl) => {
let mutbl = match *rv {
Rvalue::Ref(_, kind, _) => kind.to_mutbl_lossy(),
Rvalue::AddressOf(mutbl, _) => mutbl,
_ => unreachable!(),
};
let projection = &pl.projection[..];
match projection
.iter()
Expand All @@ -63,13 +75,15 @@ pub fn describe_rvalue<'tcx>(rv: &Rvalue<'tcx>) -> Option<RvalueDesc<'tcx>> {
projection: &projection[..i],
},
proj: &projection[i + 1..],
mutbl,
}
}
None => {
// `pl` refers to a field/element of a local.
RvalueDesc::AddrOfLocal {
local: pl.local,
proj: projection,
mutbl,
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion c2rust-analyze/tests/filecheck/non_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn g(cond: bool) {
// CHECK-LABEL: final labeling for "h"
fn h(cond: bool) {
let x = 1_i32;
// CHECK: ([[@LINE+1]]: y): {{.*}}, type = UNIQUE | NON_NULL | STACK#
// CHECK: ([[@LINE+1]]: y): {{.*}}, type = READ | UNIQUE | NON_NULL | STACK#
let y = ptr::addr_of!(x);
// CHECK: ([[@LINE+1]]: z): {{.*}}, type = UNIQUE | STACK#
let z = if cond {
Expand Down
Loading

0 comments on commit 1a418d6

Please sign in to comment.