From 4bef99f6c298364ba00e955b878b3473bca92fc1 Mon Sep 17 00:00:00 2001 From: mmynk Date: Mon, 13 Nov 2023 13:11:43 -0800 Subject: [PATCH] Handle network stats parsing gracefully (#8212) Summary: Goal: Error in recording one of the network stats should not result in completely empty network stats. A process died and recording `snpm6` stats failed for that process which resulted in none of network stats being recorded. Error: ``` Sep 26 21:11:49 ip-10-66-6-18 below[21496]: Sep 27 04:11:49.866 ERRO Os { code: 2, kind: NotFound, message: "No such file or directory" }: "/proc/21496/net/snmp6" ``` Pull Request resolved: /~https://github.com/facebookincubator/below/pull/8212 Reviewed By: dschatzberg Differential Revision: D51032506 Pulled By: brianc118 fbshipit-source-id: 9b9850dd6f41bbe1f4578872b95c2ff2f14f64be --- Cargo.lock | 2 ++ below/model/src/collector.rs | 2 +- below/procfs/Cargo.toml | 2 ++ below/procfs/src/lib.rs | 57 ++++++++++++++++++++++++++---------- below/procfs/src/test.rs | 40 ++++++++++++++++++++++++- 5 files changed, 86 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d01b333a..9eea4200 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -887,6 +887,8 @@ dependencies = [ "nix 0.25.0", "openat", "serde", + "slog", + "slog-term", "tempfile", "thiserror", "threadpool", diff --git a/below/model/src/collector.rs b/below/model/src/collector.rs index 419bfa8b..08645d30 100644 --- a/below/model/src/collector.rs +++ b/below/model/src/collector.rs @@ -200,7 +200,7 @@ fn collect_sample(logger: &slog::Logger, options: &CollectorOptions) -> Result ns.into(), Err(e) => { error!(logger, "{:#}", e); diff --git a/below/procfs/Cargo.toml b/below/procfs/Cargo.toml index 82346701..78df9266 100644 --- a/below/procfs/Cargo.toml +++ b/below/procfs/Cargo.toml @@ -16,8 +16,10 @@ libc = "0.2.139" nix = "0.25" openat = "0.1.21" serde = { version = "1.0.185", features = ["derive", "rc"] } +slog = { version = "2.7", features = ["max_level_trace", "nested-values"] } thiserror = "1.0.43" threadpool = "1.8.1" [dev-dependencies] +slog-term = "2.8" tempfile = "3.8" diff --git a/below/procfs/src/lib.rs b/below/procfs/src/lib.rs index bf43a30a..c3f0afaa 100644 --- a/below/procfs/src/lib.rs +++ b/below/procfs/src/lib.rs @@ -29,6 +29,7 @@ use std::time::Duration; use lazy_static::lazy_static; use nix::sys; use openat::Dir; +use slog::debug; use thiserror::Error; use threadpool::ThreadPool; @@ -908,16 +909,18 @@ macro_rules! get_val_from_stats_map { } pub struct NetReader { + logger: slog::Logger, interface_dir: Dir, proc_net_dir: Dir, } impl NetReader { - pub fn new() -> Result { - Self::new_with_custom_path(NET_SYSFS.into(), NET_PROCFS.into()) + pub fn new(logger: slog::Logger) -> Result { + Self::new_with_custom_path(logger, NET_SYSFS.into(), NET_PROCFS.into()) } pub fn new_with_custom_path( + logger: slog::Logger, interface_path: PathBuf, proc_net_path: PathBuf, ) -> Result { @@ -927,6 +930,7 @@ impl NetReader { Dir::open(&proc_net_path).map_err(|e| Error::IoError(proc_net_path, e))?; Ok(NetReader { + logger, interface_dir, proc_net_dir, }) @@ -1305,21 +1309,44 @@ impl NetReader { } pub fn read_netstat(&self) -> Result { - let netstat_map = self.read_kv_diff_line("netstat")?; - let snmp_map = self.read_kv_diff_line("snmp")?; - let snmp6_map = self.read_kv_same_line("snmp6")?; + // Any of these files could be missing, however unlikely. + // An interface file could be missing if it is deleted while reading the directory. + // For example, if ipv6 is disabled, /proc/net/snmp6 won't exist. + // Similarly, one could even disable ipv4, in which case /proc/net/snmp won't exist. + // Thus, we handle ENOENT errors by setting corresponding fields to `None`. + let netstat_map = handle_enoent(&self.logger, self.read_kv_diff_line("netstat"))?; + let snmp_map = handle_enoent(&self.logger, self.read_kv_diff_line("snmp"))?; + let snmp6_map = handle_enoent(&self.logger, self.read_kv_same_line("snmp6"))?; + let iface_map = handle_enoent(&self.logger, self.read_net_map())?; Ok(NetStat { - interfaces: Some(self.read_net_map()?), - tcp: Some(Self::read_tcp_stat(&snmp_map)), - tcp_ext: Some(Self::read_tcp_ext_stat(&netstat_map)), - ip: Some(Self::read_ip_stat(&snmp_map)), - ip_ext: Some(Self::read_ip_ext_stat(&netstat_map)), - ip6: Some(Self::read_ip6_stat(&snmp6_map)), - icmp: Some(Self::read_icmp_stat(&snmp_map)), - icmp6: Some(Self::read_icmp6_stat(&snmp6_map)), - udp: Some(Self::read_udp_stat(&snmp_map)), - udp6: Some(Self::read_udp6_stat(&snmp6_map)), + interfaces: iface_map, + tcp: snmp_map.as_ref().map(Self::read_tcp_stat), + tcp_ext: netstat_map.as_ref().map(Self::read_tcp_ext_stat), + ip: snmp_map.as_ref().map(Self::read_ip_stat), + ip_ext: netstat_map.as_ref().map(Self::read_ip_ext_stat), + ip6: snmp6_map.as_ref().map(Self::read_ip6_stat), + icmp: snmp_map.as_ref().map(Self::read_icmp_stat), + icmp6: snmp6_map.as_ref().map(Self::read_icmp6_stat), + udp: snmp_map.as_ref().map(Self::read_udp_stat), + udp6: snmp6_map.as_ref().map(Self::read_udp6_stat), }) } } + +/// Wraps the result into an `Option` if the result is not an error. +/// If the error is of type `ENOENT`, it is returned as `Ok(None)`. +/// Else, the error itself is returned. +fn handle_enoent( + logger: &slog::Logger, + result: Result>, +) -> Result>> { + match result { + Ok(map) => Ok(Some(map)), + Err(Error::IoError(_, err)) if err.kind() == ErrorKind::NotFound => { + debug!(logger, "{:?}", err); + Ok(None) + } + Err(err) => Err(err), + } +} diff --git a/below/procfs/src/test.rs b/below/procfs/src/test.rs index 04112ee4..eac2e2c7 100644 --- a/below/procfs/src/test.rs +++ b/below/procfs/src/test.rs @@ -17,6 +17,7 @@ use std::io::Write; use std::os::unix::fs::symlink; use std::path::Path; +use slog::Drain; use tempfile::TempDir; use crate::types::*; @@ -80,11 +81,12 @@ impl TestProcfs { } fn get_net_reader(&self) -> NetReader { + let logger = get_logger(); let iface_dir = self.path().join("iface"); if !iface_dir.exists() { std::fs::create_dir(&iface_dir).expect("Failed to create iface dir"); } - NetReader::new_with_custom_path(iface_dir, self.path().to_path_buf()) + NetReader::new_with_custom_path(logger, iface_dir, self.path().to_path_buf()) .expect("Fail to construct Net Reader") } @@ -122,6 +124,11 @@ impl TestProcfs { } } +fn get_logger() -> slog::Logger { + let plain = slog_term::PlainSyncDecorator::new(std::io::stderr()); + slog::Logger::root(slog_term::FullFormat::new(plain).build().fuse(), slog::o!()) +} + #[test] fn test_kernel_version() { let version = b"1.2.3"; @@ -1190,6 +1197,37 @@ fn test_read_net_stat() { verify_interfaces(&netstat); } +#[test] +fn test_read_enoent() { + let netsysfs = TestProcfs::new(); + write_net_map(&netsysfs); + + let netstat = netsysfs + .get_net_reader() + .read_netstat() + .expect("Fail to get NetStat"); + verify_interfaces(&netstat); + assert_eq!(netstat.tcp, None); + assert_eq!(netstat.tcp_ext, None); + assert_eq!(netstat.ip, None); + assert_eq!(netstat.ip_ext, None); + assert_eq!(netstat.ip6, None); + assert_eq!(netstat.icmp, None); + assert_eq!(netstat.icmp6, None); + assert_eq!(netstat.udp, None); + assert_eq!(netstat.udp6, None); +} + +#[test] +fn test_read_bad_file() { + let netsysfs = TestProcfs::new(); + write_net_map(&netsysfs); + netsysfs.create_file_with_content("snmp", b"bad\nfile"); + + let err = netsysfs.get_net_reader().read_netstat().unwrap_err(); + assert!(matches!(err, crate::Error::InvalidFileFormat(_))); +} + fn verify_tcp(netstat: &NetStat) { let tcp = netstat.tcp.as_ref().expect("Fail to collect tcp stats"); assert_eq!(tcp.active_opens, Some(54_858_563));