Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

analyze: misc fixes for lighttpd buffer (part 2/2) #1180

Merged
merged 7 commits into from
Dec 9, 2024
1 change: 1 addition & 0 deletions c2rust-analyze/scripts/auto_fix_errors.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python3
import argparse
from collections import defaultdict, namedtuple
import json
Expand Down
116 changes: 116 additions & 0 deletions c2rust-analyze/scripts/extract_working_defs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env python3
"""
spernsteiner marked this conversation as resolved.
Show resolved Hide resolved
Script for extracting defs on which rewriting succeeded. Currently, this is
specific to lighttpd_rust_amalgamated, but it could be generalized to work on
other projects as well. The script identifies "working defs" by reading the
log of a `c2rust-analyze` run and collecting defs that have no reported
"analysis failed" errors and are not listed in the `--fixed-defs-list`. It
then locates each working def in the rewritten code by searching for "start/end
of def" comments and prints the code for each working def to stdout.
"""
from dataclasses import dataclass
import re
import subprocess
import sys

ANALYSIS_FAILED_RE = re.compile('analysis of (DefId\([^)]*\)) failed:.*$')
DEF_SPAN_RE = re.compile('(DefId\([^)]*\)) @ (.*)$')
# src/main.rs:422:1: 422:49 (#0)
SPAN_RE = re.compile('([^:]+):([0-9]+):([0-9]+): ([0-9]+):([0-9]+) \(#[0-9]+\)')

START_OF_DEF_RE = re.compile(' *// [0-9]*: [^:]*: start of def (DefId\([^)]*\))$')
END_OF_DEF_RE = re.compile(' *// [0-9]*: [^:]*: end of def (DefId\([^)]*\))$')

HEADER = '''
#![feature(rustc_private)]
#![feature(register_tool)]
#![register_tool(c2rust_analyze_test)]

extern crate libc;
extern crate core;

pub type __uint32_t = libc::c_uint;
pub type __intmax_t = libc::c_long;
pub type __uintmax_t = libc::c_ulong;
pub type size_t = libc::c_ulong;
pub type uint32_t = __uint32_t;
pub type uint_fast32_t = libc::c_ulong;
pub type intmax_t = __intmax_t;
pub type uintmax_t = __uintmax_t;

#[no_mangle]
#[cold]
#[c2rust_analyze_test::fixed_signature]
pub extern "C" fn ck_assert_failed(
) -> ! {
panic!();
}
'''

def main():
fixed_defs_path, log_path = sys.argv[1:]

error_summary = subprocess.run(('grep', '-A999999', '^error summary:$', log_path),
check=True, capture_output=True, text=True).stdout
error_defs = set()
for line in error_summary.splitlines():
line = line.strip()
m = ANALYSIS_FAILED_RE.match(line)
if m is not None:
error_defs.add(m.group(1))
else:
print('bad line %r' % (line,), file=sys.stderr)

unfixed_defs = set()
for line in open(fixed_defs_path):
line = line.strip()
if line.startswith('#'):
line = line[1:].strip()
print('unfixed def: %r' % line, file=sys.stderr)
unfixed_defs.add(line)

rewritten_defs = unfixed_defs - error_defs

f = open('../../lighttpd-rust/lighttpd_rust_amalgamated/src/main.new.rs')
lines = list(f)
start_of_def = {}
end_of_def = {}
last_non_prefix_line = 0
prev_last_non_prefix_line = 0
for i, line in enumerate(lines):
m = START_OF_DEF_RE.match(line)
if m is not None:
start_of_def[m.group(1)] = prev_last_non_prefix_line + 1
m = END_OF_DEF_RE.match(line)
if m is not None:
end_of_def[m.group(1)] = i
line = line.strip()
if not line.startswith('//') and not line.startswith('#'):
prev_last_non_prefix_line = last_non_prefix_line
last_non_prefix_line = i


print(HEADER)

for did in sorted(rewritten_defs):
print('\n// BEGIN %s' % did)
if did not in start_of_def:
continue
start = start_of_def[did]
end = end_of_def[did] + 1
in_prefix_comments = True
for line in lines[start:end]:
if in_prefix_comments:
if not line.strip().startswith('//'):
in_prefix_comments = False
else:
continue
sys.stdout.write(line)
print('// END %s' % did)

print('extracted %d working defs:' % len(rewritten_defs), file=sys.stderr)
for did in sorted(rewritten_defs):
print(did, file=sys.stderr)

if __name__ == '__main__':
main()
14 changes: 10 additions & 4 deletions c2rust-analyze/scripts/pointwise_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,23 @@ def read_func_errors(f):
func_errors[func] = errors
return func_errors

def calc_pct(n, d):
if d == 0:
return 0
else:
return n / d * 100

pointwise_func_errors = read_func_errors(open(pointwise_log_path))
pointwise_ok = set(func for func, errors in pointwise_func_errors.items() if errors == 0)
print('pointwise: %5d/%d functions passed (%.1f%%)' % (
len(pointwise_ok), len(pointwise_func_errors),
len(pointwise_ok) / len(pointwise_func_errors) * 100))
calc_pct(len(pointwise_ok), len(pointwise_func_errors))))

unmodified_func_errors = read_func_errors(open(unmodified_log_path))
unmodified_ok = set(func for func, errors in unmodified_func_errors.items() if errors == 0)
print('unmodified: %5d/%d functions passed (%.1f%%)' % (
len(unmodified_ok), len(unmodified_func_errors),
len(unmodified_ok) / len(unmodified_func_errors) * 100))
calc_pct(len(unmodified_ok), len(unmodified_func_errors))))

assert len(pointwise_func_errors) == len(unmodified_func_errors)
num_total = len(pointwise_func_errors)
Expand All @@ -69,7 +75,7 @@ def read_func_errors(f):

improved = pointwise_ok - unmodified_ok
print('improved: %5d/%d functions (%.1f%%)' % (
len(improved), num_unmodified_bad, len(improved) / num_unmodified_bad * 100))
len(improved), num_unmodified_bad, calc_pct(len(improved), num_unmodified_bad)))
broke = unmodified_ok - pointwise_ok
print('broke: %5d/%d functions (%.1f%%)' % (
len(broke), num_unmodified_ok, len(broke) / num_unmodified_ok * 100))
len(broke), num_unmodified_ok, calc_pct(len(broke), num_unmodified_ok)))
4 changes: 4 additions & 0 deletions c2rust-analyze/scripts/run_pointwise_metrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ now=$(date +%Y%m%d-%H%M%S)
project="$(basename "$MODULE_DIR")"
case "$project" in
lighttpd_*)
# Make sure the project has been built first. This ensures that the
# `extern` function can find the libraries it needs.
cargo build --manifest-path "$MODULE_DIR/Cargo.toml"

rustc_flags=(
--edition 2021
--crate-type rlib
Expand Down
10 changes: 10 additions & 0 deletions c2rust-analyze/src/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1511,6 +1511,16 @@ fn run2<'tcx>(
info.acx_data.set(acx.into_data());
}

// Annotate begin/end of each def. This is used by extract_working_defs.py to locate defs that
// were rewritten successfully.
if env::var("C2RUST_ANALYZE_ANNOTATE_DEF_SPANS").as_deref() == Ok("1") {
for ldid in tcx.hir_crate_items(()).definitions() {
let span = tcx.source_span(ldid);
ann.emit(span.shrink_to_lo(), format_args!("start of def {ldid:?}"));
ann.emit(span.shrink_to_hi(), format_args!("end of def {ldid:?}"));
}
}

// Print results for `static` items.
debug!("\nfinal labeling for static items:");
let lcx1 = crate::labeled_ty::LabeledTyCtxt::new(tcx);
Expand Down
14 changes: 10 additions & 4 deletions c2rust-analyze/src/borrowck/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,18 @@ pub fn borrowck_mir<'tcx>(
});
let ptr = match stmt.kind {
StatementKind::Assign(ref x) => match describe_rvalue(&x.1) {
Some(RvalueDesc::Project { base, proj: _ }) => acx
Some(RvalueDesc::Project {
base,
proj: _,
mutbl: _,
}) => acx
.ptr_of(base)
.unwrap_or_else(|| panic!("missing pointer ID for {:?}", base)),
Some(RvalueDesc::AddrOfLocal { local, proj: _ }) => {
acx.addr_of_local[local]
}
Some(RvalueDesc::AddrOfLocal {
local,
proj: _,
mutbl: _,
}) => acx.addr_of_local[local],
None => panic!("loan {:?} was issued by unknown rvalue {:?}?", loan, x.1),
},
_ => panic!("loan {:?} was issued by non-assign stmt {:?}?", loan, stmt),
Expand Down
14 changes: 10 additions & 4 deletions c2rust-analyze/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,11 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> {
let ty = rv.ty(self, self.tcx());
if matches!(ty.kind(), TyKind::Ref(..) | TyKind::RawPtr(..)) {
let (pointee_lty, proj, ptr) = match desc {
RvalueDesc::Project { base, proj } => {
RvalueDesc::Project {
base,
proj,
mutbl: _,
} => {
let base_lty = self.type_of(base);
debug!(
"rvalue = {:?}, desc = {:?}, base_lty = {:?}",
Expand All @@ -1169,9 +1173,11 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> {
base_lty.label,
)
}
RvalueDesc::AddrOfLocal { local, proj } => {
(self.type_of(local), proj, self.addr_of_local[local])
}
RvalueDesc::AddrOfLocal {
local,
proj,
mutbl: _,
} => (self.type_of(local), proj, self.addr_of_local[local]),
};

let mut pointee_lty = pointee_lty;
Expand Down
6 changes: 3 additions & 3 deletions c2rust-analyze/src/dataflow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::context::{AnalysisCtxt, Assignment, FlagSet, PermissionSet, PointerId
use crate::pointee_type::PointeeTypes;
use crate::pointer_id::{GlobalPointerTable, PointerTable};
use crate::recent_writes::RecentWrites;
use log::debug;
use log::{debug, trace};
use rustc_middle::mir::Body;

mod type_check;
Expand Down Expand Up @@ -71,9 +71,9 @@ impl DataflowConstraints {
for c in &self.constraints {
debug!(" {:?}", c);
}
debug!("hypothesis:");
trace!("hypothesis:");
for (id, p) in hypothesis.iter() {
debug!(" {}: {:?}", id, p);
trace!(" {}: {:?}", id, p);
}

struct PropagatePerms;
Expand Down
14 changes: 11 additions & 3 deletions c2rust-analyze/src/dataflow/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,17 @@ impl<'tcx> TypeChecker<'tcx, '_> {

if let Some(desc) = rv_desc {
match desc {
RvalueDesc::Project { base, proj: _ } => {
// TODO: mutability should probably depend on mutability of the output ref/ptr
self.visit_place_ref(base, Mutability::Not);
RvalueDesc::Project {
base,
proj: _,
mutbl,
} => {
self.visit_place_ref(base, mutbl);
let base_ptr = self.acx.type_of(base).label;
// Note that even non-ptr copy operations are treated as no-op `Project`s.
if !base_ptr.is_none() {
self.record_access(base_ptr, mutbl);
}
}
RvalueDesc::AddrOfLocal { .. } => {}
}
Expand Down
11 changes: 11 additions & 0 deletions c2rust-analyze/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ struct Args {
#[clap(long)]
use_manual_shims: bool,

/// Add "start/end of def" annotations as comments around each definition. These annotations
/// are used by `scripts/extract_working_defs.py` to locate specific defs in the rewritten
/// code.
#[clap(long)]
annotate_def_spans: bool,

/// Read a list of defs that should be marked non-rewritable (`FIXED`) from this file path.
/// Run `c2rust-analyze` without this option and check the debug output for a full list of defs
/// in the crate being analyzed; the file passed to this option should list a subset of those
Expand Down Expand Up @@ -401,6 +407,7 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> {
mut rewrite_mode,
rewrite_in_place,
use_manual_shims,
annotate_def_spans,
fixed_defs_list,
force_rewrite_defs_list,
skip_pointee_defs_list,
Expand Down Expand Up @@ -478,6 +485,10 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> {
cmd.env("C2RUST_ANALYZE_USE_MANUAL_SHIMS", "1");
}

if annotate_def_spans {
cmd.env("C2RUST_ANALYZE_ANNOTATE_DEF_SPANS", "1");
}

Ok(())
})?;

Expand Down
5 changes: 4 additions & 1 deletion c2rust-analyze/src/pointee_type/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ impl<'tcx> TypeChecker<'tcx, '_> {
pub fn visit_rvalue(&mut self, rv: &Rvalue<'tcx>, lty: LTy<'tcx>) {
trace!("visit_rvalue({rv:?}, {lty:?})");

if let Some(RvalueDesc::Project { base, proj: &[] }) = describe_rvalue(rv) {
if let Some(RvalueDesc::Project {
base, proj: &[], ..
}) = describe_rvalue(rv)
{
// Special case for no-op projections like `&*p`. Since the pointer is passed through
// unchanged, we don't require the pointee type to actually match the type used for the
// paired deref and address-of operations.
Expand Down
14 changes: 14 additions & 0 deletions c2rust-analyze/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@ pub enum RvalueDesc<'tcx> {
base: PlaceRef<'tcx>,
/// The projection applied to the pointer. This contains no `Deref` projections.
proj: &'tcx [PlaceElem<'tcx>],
/// Mutability of the resulting reference.
mutbl: Mutability,
},
/// The address of a local or one of its fields, such as `&x.y`. The rvalue is split into a
/// base local (in this case `x`) and a projection (`.y`). The `&` is implicit.
AddrOfLocal {
local: Local,
/// The projection applied to the local. This contains no `Deref` projections.
proj: &'tcx [PlaceElem<'tcx>],
/// Mutability of the resulting reference.
mutbl: Mutability,
},
}

Expand All @@ -46,10 +50,18 @@ pub fn describe_rvalue<'tcx>(rv: &Rvalue<'tcx>) -> Option<RvalueDesc<'tcx>> {
Operand::Move(pl) | Operand::Copy(pl) => RvalueDesc::Project {
base: pl.as_ref(),
proj: &[],
// This is an rvalue of an `Assign` statement, so it's always in a non-mutable
// position.
mutbl: Mutability::Not,
},
Operand::Constant(_) => return None,
},
Rvalue::Ref(_, _, pl) | Rvalue::AddressOf(_, pl) => {
let mutbl = match *rv {
Rvalue::Ref(_, kind, _) => kind.to_mutbl_lossy(),
Rvalue::AddressOf(mutbl, _) => mutbl,
_ => unreachable!(),
};
let projection = &pl.projection[..];
match projection
.iter()
Expand All @@ -63,13 +75,15 @@ pub fn describe_rvalue<'tcx>(rv: &Rvalue<'tcx>) -> Option<RvalueDesc<'tcx>> {
projection: &projection[..i],
},
proj: &projection[i + 1..],
mutbl,
}
}
None => {
// `pl` refers to a field/element of a local.
RvalueDesc::AddrOfLocal {
local: pl.local,
proj: projection,
mutbl,
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion c2rust-analyze/tests/filecheck/non_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn g(cond: bool) {
// CHECK-LABEL: final labeling for "h"
fn h(cond: bool) {
let x = 1_i32;
// CHECK: ([[@LINE+1]]: y): {{.*}}, type = UNIQUE | NON_NULL | STACK#
// CHECK: ([[@LINE+1]]: y): {{.*}}, type = READ | UNIQUE | NON_NULL | STACK#
let y = ptr::addr_of!(x);
// CHECK: ([[@LINE+1]]: z): {{.*}}, type = UNIQUE | STACK#
let z = if cond {
Expand Down
Loading
Loading