diff --git a/Cargo.toml b/Cargo.toml index b5d5dfc621..0793ca3b13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,9 +40,9 @@ mimalloc-sys = { version = "0.1.6", optional = true } mmtk-macros = { version = "0.20.0", path = "macros/" } num_cpus = "1.8" num-traits = "0.2" -pfm = { version = "0.1.0-beta.3", optional = true } -probe = "0.5" +pfm = { version = "0.1.1", optional = true } portable-atomic = "1.4.3" +probe = "0.5" regex = "1.7.0" spin = "0.9.5" static_assertions = "1.1.0" diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index f40857182c..027d3c490c 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -240,7 +240,12 @@ impl WorkerLocalStat { let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; #[cfg(feature = "perf_counter")] for e in &mmtk.options.work_perf_events.events { - counters.push(Box::new(WorkPerfEvent::new(&e.0, e.1, e.2))); + counters.push(Box::new(WorkPerfEvent::new( + &e.0, + e.1, + e.2, + *mmtk.options.perf_exclude_kernel, + ))); } counters } diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 1e1d8b5770..505105b042 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -162,9 +162,10 @@ mod perf_event { /// 0, -1 measures the calling thread on all CPUs /// -1, 0 measures all threads on CPU 0 /// -1, -1 is invalid - pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent { + pub fn new(name: &str, pid: pid_t, cpu: c_int, exclude_kernel: bool) -> WorkPerfEvent { let mut pe = PerfEvent::new(name, false) .unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); + pe.set_exclude_kernel(exclude_kernel as u64); pe.open(pid, cpu) .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); WorkPerfEvent { diff --git a/src/util/options.rs b/src/util/options.rs index bbe408c00f..3b0bff5249 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -733,6 +733,9 @@ options! { // Measuring perf events for GC and mutators // TODO: Ideally this option should only be included when the features 'perf_counter' are enabled. The current macro does not allow us to do this. phase_perf_events: PerfEventOptions [env_var: true, command_line: true] [|_| cfg!(feature = "perf_counter")] = PerfEventOptions {events: vec![]}, + // Should we exclude perf events occurring in kernel space. By default we include the kernel. + // Only set this option if you know the implications of excluding the kernel! + perf_exclude_kernel: bool [env_var: true, command_line: true] [|_| cfg!(feature = "perf_counter")] = false, // Set how to bind affinity to the GC Workers. Default thread affinity delegates to the OS // scheduler. If a list of cores are specified, cores are allocated to threads in a round-robin // fashion. The core ids should match the ones reported by /proc/cpuinfo. Core ids are @@ -749,7 +752,7 @@ options! { thread_affinity: AffinityKind [env_var: true, command_line: true] [|v: &AffinityKind| v.validate()] = AffinityKind::OsDefault, // Set the GC trigger. This defines the heap size and how MMTk triggers a GC. // Default to a fixed heap size of 0.5x physical memory. - gc_trigger : GCTriggerSelector [env_var: true, command_line: true] [|v: &GCTriggerSelector| v.validate()] = GCTriggerSelector::FixedHeapSize((crate::util::memory::get_system_total_memory() as f64 * 0.5f64) as usize), + gc_trigger: GCTriggerSelector [env_var: true, command_line: true] [|v: &GCTriggerSelector| v.validate()] = GCTriggerSelector::FixedHeapSize((crate::util::memory::get_system_total_memory() as f64 * 0.5f64) as usize), // Enable transparent hugepage support via madvise (only Linux is supported) transparent_hugepages: bool [env_var: true, command_line: true] [|v: &bool| !v || cfg!(target_os = "linux")] = false } diff --git a/src/util/statistics/counter/perf_event.rs b/src/util/statistics/counter/perf_event.rs index a203f41257..3b0d616d26 100644 --- a/src/util/statistics/counter/perf_event.rs +++ b/src/util/statistics/counter/perf_event.rs @@ -9,9 +9,10 @@ pub struct PerfEventDiffable { } impl PerfEventDiffable { - pub fn new(name: &str) -> Self { + pub fn new(name: &str, exclude_kernel: bool) -> Self { let mut pe = PerfEvent::new(name, true) .unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); + pe.set_exclude_kernel(exclude_kernel as u64); // measures the calling thread (and all child threads) on all CPUs pe.open(0, -1) .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 61a2a05172..6e754b1384 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -91,7 +91,7 @@ impl Stats { shared.clone(), true, false, - PerfEventDiffable::new(&e.0), + PerfEventDiffable::new(&e.0, *options.perf_exclude_kernel), )))); } Stats {