Skip to content

Commit

Permalink
Avoid potential overflows (#1138)
Browse files Browse the repository at this point in the history
### What does this PR do?

There is a bug in lading where gauge values for smaps_rollup, smaps
come through in the EiB range. The changes here are not a fix but they
don't hurt either and they're a good idea.
  • Loading branch information
blt authored Dec 11, 2024
1 parent dce08ac commit b2d5858
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 18 deletions.
28 changes: 15 additions & 13 deletions lading/src/observer/linux/procfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,25 +80,26 @@ impl Sampler {
#[allow(
clippy::similar_names,
clippy::too_many_lines,
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap
)]
pub(crate) async fn poll(&mut self) -> Result<(), Error> {
// Key for this map is (pid, basename/exe, cmdline)
let mut samples: FxHashMap<ProcessIdentifier, Sample> = FxHashMap::default();

let mut total_processes: u64 = 0;
// We maintain a tally of the total RSS and PSS consumed by the parent
// process and its children.
let mut aggr = memory::smaps_rollup::Aggregator::default();

// Calculate the ticks since machine uptime. This will be important
// later for calculating per-process uptime. Because we capture this one
// we will be slightly out of date with each subsequent iteration of the
// loop. We do not believe this to be an issue.
let uptime_seconds: f64 = procfs::Uptime::current()?.uptime; // seconds since boot
let uptime_ticks: u64 = uptime_seconds.round() as u64 * self.ticks_per_second; // CPU-ticks since boot
let uptime_ticks: u64 =
(uptime_seconds.round() as u64).saturating_mul(self.ticks_per_second); // CPU-ticks since boot

// A tally of the total RSS and PSS consumed by the parent process and
// its children.
let mut aggr = memory::smaps_rollup::Aggregator::default();

// Every sample run we collect all the child processes rooted at the
// parent. As noted by the procfs documentation is this done by
Expand Down Expand Up @@ -270,7 +271,7 @@ impl Sampler {
// Number of threads this process has active.
gauge!("num_threads", &labels).set(stats.num_threads as f64);

total_processes += 1;
total_processes = total_processes.saturating_add(1);

// Also report memory data from `proc/status` as a reference point
report_status_field!(status, labels, vmrss);
Expand All @@ -282,10 +283,11 @@ impl Sampler {
report_status_field!(status, labels, vmexe);
report_status_field!(status, labels, vmlib);

// smaps and smaps_rollup are both governed by the same permission restrictions
// as /proc/[pid]/maps. Per man 5 proc:
// Permission to access this file is governed by a ptrace access mode PTRACE_MODE_READ_FSCREDS check; see ptrace(2).
// If a previous call to process.status() failed due to a lack of ptrace permissions,
// smaps and smaps_rollup are both governed by the same permission
// restrictions as /proc/[pid]/maps. Per man 5 proc: Permission to
// access this file is governed by a ptrace access mode
// PTRACE_MODE_READ_FSCREDS check; see ptrace(2). If a previous call
// to process.status() failed due to a lack of ptrace permissions,
// then we assume smaps operations will fail as well.
if has_ptrace_perm {
// `/proc/{pid}/smaps`
Expand Down Expand Up @@ -406,8 +408,8 @@ impl Sampler {
} = key;

Sample {
utime: acc.utime + sample.utime,
stime: acc.stime + sample.stime,
utime: acc.utime.saturating_add(sample.utime),
stime: acc.stime.saturating_add(sample.stime),
// use parent process uptime
uptime: if *pid == self.parent.pid() {
sample.uptime
Expand Down
10 changes: 5 additions & 5 deletions lading/src/observer/linux/procfs/memory/smaps_rollup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub(crate) async fn poll(
continue;
};

let value_kb = {
let value_bytes = {
let value_token = next_token(line, &mut chars).ok_or(Error::Parsing(format!(
"Could not parse numeric value from line: {line}"
)))?;
Expand All @@ -55,7 +55,7 @@ pub(crate) async fn poll(
let numeric = value_token.parse::<u64>()?;

match unit {
"kB" => Ok(numeric * BYTES_PER_KIBIBYTE),
"kB" => Ok(numeric.saturating_mul(BYTES_PER_KIBIBYTE)),
unknown => Err(Error::Parsing(format!(
"Unknown unit: {unknown} in line: {line}"
))),
Expand All @@ -66,12 +66,12 @@ pub(crate) async fn poll(
// Last character is a :, skip it.
let field = name[..name_len - 1].to_snake_case();
match field.as_str() {
"rss" => aggr.rss += value_kb,
"pss" => aggr.pss += value_kb,
"rss" => aggr.rss = aggr.rss.saturating_add(value_bytes),
"pss" => aggr.pss = aggr.pss.saturating_add(value_bytes),
_ => { /* ignore other fields */ }
}
let metric_name = format!("smaps_rollup.{field}");
gauge!(metric_name, labels).set(value_kb as f64);
gauge!(metric_name, labels).set(value_bytes as f64);
}

Ok(())
Expand Down

0 comments on commit b2d5858

Please sign in to comment.