Skip to content

Commit

Permalink
Merge pull request #1743 from rust-lang/precise-cachegrind
Browse files Browse the repository at this point in the history
Add precise Cachegrind profiling mode
  • Loading branch information
Kobzol authored Nov 13, 2023
2 parents ce83d3c + c8b1440 commit c19ca6a
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 2 deletions.
13 changes: 13 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions collector/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,9 @@ benchlib = { path = "benchlib" }
[target.'cfg(windows)'.dependencies]
miow = "0.3"
windows-sys = { version = "0.36.1", features = ["Win32_Foundation"] }

[features]
# Enable more precise Cachegrind profiles for runtime benchmarks.
# Requires a recent Valgrind to be installed.
# Pass DEP_VALGRIND=<path-to-valgrind>/include environment variable when building.
precise-cachegrind = []
10 changes: 9 additions & 1 deletion collector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,15 @@ It is also possible to profile runtime benchmarks using the following command:
```

Currently, a `<PROFILER>` can be `cachegrind`, which will run the runtime benchmark under
`Cachegrind`.
`Cachegrind`. If you pass `--features precise-cachegrind`, you can get more precise profiling results.
In this mode, Cachegrind will only record the instructions of the actual benchmark, and ignore any
other code (e.g. benchmark initialization). To use this mode, you need to provide a path to a Valgrind
build directory (at least Valgrind 3.22 is required), like this:

```
DEP_VALGRIND=<path-to-valgrind-install>/include cargo run --release --bin collector \
--features precise-cachegrind profile_runtime cachegrind <RUSTC> <BENCHMARK_NAME>
```

## Codegen diff
You can use the `codegen_diff` command to display the assembly, LLVM IR or MIR difference between two
Expand Down
2 changes: 2 additions & 0 deletions collector/benchlib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ env_logger = "0.10.0"
clap = { version = "4.1", features = ["derive", "string"] }
libc = "0.2"
flate2 = { version = "1", optional = true }
crabgrind = { version = "0.1.10", optional = true }

[target.'cfg(target_os = "linux")'.dependencies]
perf-event = "0.4.7"

[features]
compression = ["dep:flate2"]
precise-cachegrind = ["dep:crabgrind"]
13 changes: 13 additions & 0 deletions collector/benchlib/src/profile.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
pub fn profile_function<F: Fn() -> Bench, R, Bench: FnOnce() -> R>(benchmark_constructor: &F) {
let func = benchmark_constructor();

// With the `precise-cachegrind` feature, we want to enable cachegrind recording
// only for the actual execution of the profiled function.
#[cfg(feature = "precise-cachegrind")]
{
crabgrind::cachegrind::start_instrumentation();
}

func();

#[cfg(feature = "precise-cachegrind")]
{
crabgrind::cachegrind::stop_instrumentation();
}
}
15 changes: 14 additions & 1 deletion collector/src/runtime/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,16 @@ pub struct BenchmarkSuiteCompilation {

impl BenchmarkSuiteCompilation {
pub fn extract_suite(self) -> BenchmarkSuite {
assert!(self.failed_to_compile.is_empty());
use std::fmt::Write;

if !self.failed_to_compile.is_empty() {
let mut message =
"Cannot extract runtime suite because of compilation errors:\n".to_string();
for (group, error) in self.failed_to_compile {
writeln!(message, "{group}\n{error}\n").unwrap();
}
panic!("{message}");
}
self.suite
}
}
Expand Down Expand Up @@ -358,6 +367,10 @@ fn start_cargo_build(
command.arg(target_dir);
}

// Enable the precise-cachegrind feature for the benchlib dependency of the runtime group.
#[cfg(feature = "precise-cachegrind")]
command.arg("--features").arg("benchlib/precise-cachegrind");

let child = command
.spawn()
.map_err(|error| anyhow::anyhow!("Failed to start cargo: {:?}", error))?;
Expand Down
7 changes: 7 additions & 0 deletions collector/src/runtime/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ pub fn profile_runtime(
.arg("--branch-sim=no")
.arg("--cache-sim=no")
.arg(format!("--cachegrind-out-file={}", cgout_tmp.display()));

// Disable cachegrind profile collection at start.
// It will be enabled only for the profiled function using
// Valgrind client requests (see `benchlib/src/profile.rs`).
#[cfg(feature = "precise-cachegrind")]
cmd.arg("--instr-at-start=no");

cmd.stdin(Stdio::null());
cmd.arg(&group.binary).arg("profile").arg(benchmark);
command_output(&mut cmd).context("Cannot run profiler")?;
Expand Down

0 comments on commit c19ca6a

Please sign in to comment.