diff --git a/Cargo.lock b/Cargo.lock index dc94e6f0b..cc99db202 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -143,6 +143,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", + "crabgrind", "env_logger", "flate2", "libc", @@ -259,6 +260,9 @@ name = "cc" version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +dependencies = [ + "jobserver", +] [[package]] name = "cfg-if" @@ -429,6 +433,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crabgrind" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e174279a2a6598ba9a1fec494ebb4172916b95e69d7d2396598c6a7b850d048" +dependencies = [ + "cc", +] + [[package]] name = "crc32fast" version = "1.3.2" diff --git a/collector/Cargo.toml b/collector/Cargo.toml index 327e81fd6..00d8da4fd 100644 --- a/collector/Cargo.toml +++ b/collector/Cargo.toml @@ -43,3 +43,9 @@ benchlib = { path = "benchlib" } [target.'cfg(windows)'.dependencies] miow = "0.3" windows-sys = { version = "0.36.1", features = ["Win32_Foundation"] } + +[features] +# Enable more precise Cachegrind profiles for runtime benchmarks. +# Requires a recent Valgrind to be installed. +# Pass DEP_VALGRIND=/include environment variable when building. +precise-cachegrind = [] diff --git a/collector/README.md b/collector/README.md index 45f2b3e50..488517d5a 100644 --- a/collector/README.md +++ b/collector/README.md @@ -488,7 +488,15 @@ It is also possible to profile runtime benchmarks using the following command: ``` Currently, a `` can be `cachegrind`, which will run the runtime benchmark under -`Cachegrind`. +`Cachegrind`. If you pass `--features precise-cachegrind`, you can get more precise profiling results. +In this mode, Cachegrind will only record the instructions of the actual benchmark, and ignore any +other code (e.g. benchmark initialization). To use this mode, you need to provide a path to a Valgrind +build directory (at least Valgrind 3.22 is required), like this: + +``` +DEP_VALGRIND=/include cargo run --release --bin collector \ + --features precise-cachegrind profile_runtime cachegrind +``` ## Codegen diff You can use the `codegen_diff` command to display the assembly, LLVM IR or MIR difference between two diff --git a/collector/benchlib/Cargo.toml b/collector/benchlib/Cargo.toml index a4d351a27..ae9b8e85d 100644 --- a/collector/benchlib/Cargo.toml +++ b/collector/benchlib/Cargo.toml @@ -15,9 +15,11 @@ env_logger = "0.10.0" clap = { version = "4.1", features = ["derive", "string"] } libc = "0.2" flate2 = { version = "1", optional = true } +crabgrind = { version = "0.1.10", optional = true } [target.'cfg(target_os = "linux")'.dependencies] perf-event = "0.4.7" [features] compression = ["dep:flate2"] +precise-cachegrind = ["dep:crabgrind"] diff --git a/collector/benchlib/src/profile.rs b/collector/benchlib/src/profile.rs index 354ea5b64..056ed939b 100644 --- a/collector/benchlib/src/profile.rs +++ b/collector/benchlib/src/profile.rs @@ -1,4 +1,17 @@ pub fn profile_function Bench, R, Bench: FnOnce() -> R>(benchmark_constructor: &F) { let func = benchmark_constructor(); + + // With the `precise-cachegrind` feature, we want to enable cachegrind recording + // only for the actual execution of the profiled function. + #[cfg(feature = "precise-cachegrind")] + { + crabgrind::cachegrind::start_instrumentation(); + } + func(); + + #[cfg(feature = "precise-cachegrind")] + { + crabgrind::cachegrind::stop_instrumentation(); + } } diff --git a/collector/src/runtime/benchmark.rs b/collector/src/runtime/benchmark.rs index 3803811ed..57f6a882b 100644 --- a/collector/src/runtime/benchmark.rs +++ b/collector/src/runtime/benchmark.rs @@ -135,7 +135,16 @@ pub struct BenchmarkSuiteCompilation { impl BenchmarkSuiteCompilation { pub fn extract_suite(self) -> BenchmarkSuite { - assert!(self.failed_to_compile.is_empty()); + use std::fmt::Write; + + if !self.failed_to_compile.is_empty() { + let mut message = + "Cannot extract runtime suite because of compilation errors:\n".to_string(); + for (group, error) in self.failed_to_compile { + writeln!(message, "{group}\n{error}\n").unwrap(); + } + panic!("{message}"); + } self.suite } } @@ -358,6 +367,10 @@ fn start_cargo_build( command.arg(target_dir); } + // Enable the precise-cachegrind feature for the benchlib dependency of the runtime group. + #[cfg(feature = "precise-cachegrind")] + command.arg("--features").arg("benchlib/precise-cachegrind"); + let child = command .spawn() .map_err(|error| anyhow::anyhow!("Failed to start cargo: {:?}", error))?; diff --git a/collector/src/runtime/profile.rs b/collector/src/runtime/profile.rs index b18de26f1..4bdd02c62 100644 --- a/collector/src/runtime/profile.rs +++ b/collector/src/runtime/profile.rs @@ -39,6 +39,13 @@ pub fn profile_runtime( .arg("--branch-sim=no") .arg("--cache-sim=no") .arg(format!("--cachegrind-out-file={}", cgout_tmp.display())); + + // Disable cachegrind profile collection at start. + // It will be enabled only for the profiled function using + // Valgrind client requests (see `benchlib/src/profile.rs`). + #[cfg(feature = "precise-cachegrind")] + cmd.arg("--instr-at-start=no"); + cmd.stdin(Stdio::null()); cmd.arg(&group.binary).arg("profile").arg(benchmark); command_output(&mut cmd).context("Cannot run profiler")?;