Skip to content

Commit

Permalink
Handle network stats parsing gracefully (#8212)
Browse files Browse the repository at this point in the history
Summary:
Goal: Error in recording one of the network stats should not result in completely empty network stats.

A process died and recording `snpm6` stats failed for that process which resulted in none of network stats being recorded. Error:
```
Sep 26 21:11:49 ip-10-66-6-18 below[21496]: Sep 27 04:11:49.866 ERRO Os { code: 2, kind: NotFound, message: "No such file or directory" }: "/proc/21496/net/snmp6"
```

Pull Request resolved: #8212

Reviewed By: dschatzberg

Differential Revision: D51032506

Pulled By: brianc118

fbshipit-source-id: 9b9850dd6f41bbe1f4578872b95c2ff2f14f64be
  • Loading branch information
mmynk authored and facebook-github-bot committed Nov 13, 2023
1 parent 2e26427 commit 4bef99f
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 17 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion below/model/src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ fn collect_sample(logger: &slog::Logger, options: &CollectorOptions) -> Result<S
.collect(),
exit_pidmap,
),
netstats: match procfs::NetReader::new().and_then(|v| v.read_netstat()) {
netstats: match procfs::NetReader::new(logger.clone()).and_then(|v| v.read_netstat()) {
Ok(ns) => ns.into(),
Err(e) => {
error!(logger, "{:#}", e);
Expand Down
2 changes: 2 additions & 0 deletions below/procfs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ libc = "0.2.139"
nix = "0.25"
openat = "0.1.21"
serde = { version = "1.0.185", features = ["derive", "rc"] }
slog = { version = "2.7", features = ["max_level_trace", "nested-values"] }
thiserror = "1.0.43"
threadpool = "1.8.1"

[dev-dependencies]
slog-term = "2.8"
tempfile = "3.8"
57 changes: 42 additions & 15 deletions below/procfs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use std::time::Duration;
use lazy_static::lazy_static;
use nix::sys;
use openat::Dir;
use slog::debug;
use thiserror::Error;
use threadpool::ThreadPool;

Expand Down Expand Up @@ -908,16 +909,18 @@ macro_rules! get_val_from_stats_map {
}

pub struct NetReader {
logger: slog::Logger,
interface_dir: Dir,
proc_net_dir: Dir,
}

impl NetReader {
pub fn new() -> Result<NetReader> {
Self::new_with_custom_path(NET_SYSFS.into(), NET_PROCFS.into())
pub fn new(logger: slog::Logger) -> Result<NetReader> {
Self::new_with_custom_path(logger, NET_SYSFS.into(), NET_PROCFS.into())
}

pub fn new_with_custom_path(
logger: slog::Logger,
interface_path: PathBuf,
proc_net_path: PathBuf,
) -> Result<NetReader> {
Expand All @@ -927,6 +930,7 @@ impl NetReader {
Dir::open(&proc_net_path).map_err(|e| Error::IoError(proc_net_path, e))?;

Ok(NetReader {
logger,
interface_dir,
proc_net_dir,
})
Expand Down Expand Up @@ -1305,21 +1309,44 @@ impl NetReader {
}

pub fn read_netstat(&self) -> Result<NetStat> {
let netstat_map = self.read_kv_diff_line("netstat")?;
let snmp_map = self.read_kv_diff_line("snmp")?;
let snmp6_map = self.read_kv_same_line("snmp6")?;
// Any of these files could be missing, however unlikely.
// An interface file could be missing if it is deleted while reading the directory.
// For example, if ipv6 is disabled, /proc/net/snmp6 won't exist.
// Similarly, one could even disable ipv4, in which case /proc/net/snmp won't exist.
// Thus, we handle ENOENT errors by setting corresponding fields to `None`.
let netstat_map = handle_enoent(&self.logger, self.read_kv_diff_line("netstat"))?;
let snmp_map = handle_enoent(&self.logger, self.read_kv_diff_line("snmp"))?;
let snmp6_map = handle_enoent(&self.logger, self.read_kv_same_line("snmp6"))?;
let iface_map = handle_enoent(&self.logger, self.read_net_map())?;

Ok(NetStat {
interfaces: Some(self.read_net_map()?),
tcp: Some(Self::read_tcp_stat(&snmp_map)),
tcp_ext: Some(Self::read_tcp_ext_stat(&netstat_map)),
ip: Some(Self::read_ip_stat(&snmp_map)),
ip_ext: Some(Self::read_ip_ext_stat(&netstat_map)),
ip6: Some(Self::read_ip6_stat(&snmp6_map)),
icmp: Some(Self::read_icmp_stat(&snmp_map)),
icmp6: Some(Self::read_icmp6_stat(&snmp6_map)),
udp: Some(Self::read_udp_stat(&snmp_map)),
udp6: Some(Self::read_udp6_stat(&snmp6_map)),
interfaces: iface_map,
tcp: snmp_map.as_ref().map(Self::read_tcp_stat),
tcp_ext: netstat_map.as_ref().map(Self::read_tcp_ext_stat),
ip: snmp_map.as_ref().map(Self::read_ip_stat),
ip_ext: netstat_map.as_ref().map(Self::read_ip_ext_stat),
ip6: snmp6_map.as_ref().map(Self::read_ip6_stat),
icmp: snmp_map.as_ref().map(Self::read_icmp_stat),
icmp6: snmp6_map.as_ref().map(Self::read_icmp6_stat),
udp: snmp_map.as_ref().map(Self::read_udp_stat),
udp6: snmp6_map.as_ref().map(Self::read_udp6_stat),
})
}
}

/// Wraps the result into an `Option` if the result is not an error.
/// If the error is of type `ENOENT`, it is returned as `Ok(None)`.
/// Else, the error itself is returned.
fn handle_enoent<K, V>(
logger: &slog::Logger,
result: Result<BTreeMap<K, V>>,
) -> Result<Option<BTreeMap<K, V>>> {
match result {
Ok(map) => Ok(Some(map)),
Err(Error::IoError(_, err)) if err.kind() == ErrorKind::NotFound => {
debug!(logger, "{:?}", err);
Ok(None)
}
Err(err) => Err(err),
}
}
40 changes: 39 additions & 1 deletion below/procfs/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::io::Write;
use std::os::unix::fs::symlink;
use std::path::Path;

use slog::Drain;
use tempfile::TempDir;

use crate::types::*;
Expand Down Expand Up @@ -80,11 +81,12 @@ impl TestProcfs {
}

fn get_net_reader(&self) -> NetReader {
let logger = get_logger();
let iface_dir = self.path().join("iface");
if !iface_dir.exists() {
std::fs::create_dir(&iface_dir).expect("Failed to create iface dir");
}
NetReader::new_with_custom_path(iface_dir, self.path().to_path_buf())
NetReader::new_with_custom_path(logger, iface_dir, self.path().to_path_buf())
.expect("Fail to construct Net Reader")
}

Expand Down Expand Up @@ -122,6 +124,11 @@ impl TestProcfs {
}
}

fn get_logger() -> slog::Logger {
let plain = slog_term::PlainSyncDecorator::new(std::io::stderr());
slog::Logger::root(slog_term::FullFormat::new(plain).build().fuse(), slog::o!())
}

#[test]
fn test_kernel_version() {
let version = b"1.2.3";
Expand Down Expand Up @@ -1190,6 +1197,37 @@ fn test_read_net_stat() {
verify_interfaces(&netstat);
}

#[test]
fn test_read_enoent() {
let netsysfs = TestProcfs::new();
write_net_map(&netsysfs);

let netstat = netsysfs
.get_net_reader()
.read_netstat()
.expect("Fail to get NetStat");
verify_interfaces(&netstat);
assert_eq!(netstat.tcp, None);
assert_eq!(netstat.tcp_ext, None);
assert_eq!(netstat.ip, None);
assert_eq!(netstat.ip_ext, None);
assert_eq!(netstat.ip6, None);
assert_eq!(netstat.icmp, None);
assert_eq!(netstat.icmp6, None);
assert_eq!(netstat.udp, None);
assert_eq!(netstat.udp6, None);
}

#[test]
fn test_read_bad_file() {
let netsysfs = TestProcfs::new();
write_net_map(&netsysfs);
netsysfs.create_file_with_content("snmp", b"bad\nfile");

let err = netsysfs.get_net_reader().read_netstat().unwrap_err();
assert!(matches!(err, crate::Error::InvalidFileFormat(_)));
}

fn verify_tcp(netstat: &NetStat) {
let tcp = netstat.tcp.as_ref().expect("Fail to collect tcp stats");
assert_eq!(tcp.active_opens, Some(54_858_563));
Expand Down

0 comments on commit 4bef99f

Please sign in to comment.