Skip to content

Commit

Permalink
DLPX-82032 zcache panic with 'attempt to subtract with overflow' (ope…
Browse files Browse the repository at this point in the history
  • Loading branch information
Don Brady authored Jul 27, 2022
1 parent b9c43d3 commit f0e4ff6
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 31 deletions.
57 changes: 33 additions & 24 deletions cmd/zfs_object_agent/util/src/zettacache_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ impl Sub<Self> for &StatCount {
type Output = StatCount;

fn sub(self, other: Self) -> Self::Output {
StatCount(AtomicU64::new(self.0.load(Relaxed) - other.0.load(Relaxed)))
StatCount(AtomicU64::new(
self.0.load(Relaxed).saturating_sub(other.0.load(Relaxed)),
))
}
}

Expand Down Expand Up @@ -144,7 +146,9 @@ impl Sub<Self> for &StatBytes {
type Output = StatBytes;

fn sub(self, other: Self) -> Self::Output {
StatBytes(AtomicU64::new(self.0.load(Relaxed) - other.0.load(Relaxed)))
StatBytes(AtomicU64::new(
self.0.load(Relaxed).saturating_sub(other.0.load(Relaxed)),
))
}
}

Expand Down Expand Up @@ -265,13 +269,15 @@ impl Sub<Self> for &IoStatValues {
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DiskIoStats {
pub name: String,
pub instance: u64, // unique instance for this disk
pub stats: EnumMap<DiskIoType, IoStatValues>,
}

impl DiskIoStats {
pub fn new(name: String) -> DiskIoStats {
DiskIoStats {
name,
instance: rand::random(),
stats: Default::default(),
}
}
Expand Down Expand Up @@ -320,38 +326,41 @@ pub struct IoStatsRef<'a> {
pub disk_stats: Vec<&'a DiskIoStats>,
}

impl Sub<Self> for &IoStats {
type Output = IoStats;
impl IoStats {
pub fn max_name_len(&self) -> usize {
self.disk_stats
.iter()
.map(|stats| stats.name.len())
.max()
.unwrap_or_default()
}

/// Subtract two IoStats. Used to create the net values between two IoStats samples.
fn sub(self, other: Self) -> IoStats {
/// Subtract two IoStats to create the net values between two samples.
/// Returns None if the two stats cannot be compared (agent restarted or a disk changed).
pub fn checked_sub(&self, other: &IoStats) -> Option<Self> {
if other.disk_stats.is_empty() {
return self.clone();
return Some(self.clone());
}
// Avoid comparing stats when the agent was restarted or disks were changed
if self.cache_runtime_id != other.cache_runtime_id
|| self.disk_stats.len() != other.disk_stats.len()
{
return None;
}
assert_eq!(self.cache_runtime_id, other.cache_runtime_id);

let mut difference = IoStats {
timestamp: self.timestamp - other.timestamp,
..Default::default()
};

assert_eq!(self.disk_stats.len(), other.disk_stats.len());
for (self_stat, other_stat) in self.disk_stats.iter().zip(other.disk_stats.iter()) {
assert_eq!(self_stat.name, other_stat.name);
// Avoid comparing stats when disks have changed
if self_stat.instance != other_stat.instance {
return None;
}
// We're now confident that stats are comparable so the underlying stat subtraction
// (i.e. StatCount or StatBytes) is a saturating_sub()
difference.disk_stats.push(self_stat - other_stat);
}

difference
}
}

impl IoStats {
pub fn max_name_len(&self) -> usize {
self.disk_stats
.iter()
.map(|stats| stats.name.len())
.max()
.unwrap_or_default()
Some(difference)
}
}

Expand Down
21 changes: 14 additions & 7 deletions cmd/zfs_object_agent/zcache/src/iostat.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! The iostat subcommand for zcache.
use std::cmp::max;
use std::collections::hash_map::DefaultHasher;
use std::hash::Hash;
use std::hash::Hasher;
use std::sync::atomic::Ordering::Relaxed;
use std::thread::sleep;
use std::time::Duration;
Expand Down Expand Up @@ -335,19 +338,15 @@ impl IoStatDisplay {

debug!("{latest:?}");

if previous.disk_stats.is_empty()
|| latest.cache_runtime_id == previous.cache_runtime_id
{
let delta = &latest - &previous;

if let Some(delta) = latest.checked_sub(&previous) {
match &self.histogram_name {
None => self.display_iostat_default(iteration, &delta),
Some(name) => self.display_iostat_histogram(name, &delta),
}
// Flush stdout in case output is redirected to a file
flush_stdout()?;
} else {
info!("object agent restarted");
info!("object agent restarted or disks changed");
}

iteration += 1;
Expand All @@ -371,11 +370,19 @@ impl IoStatDisplay {
}

fn insert_summary_disk(disk_stats: &mut IoStats) {
let mut summary = DiskIoStats::new("summary".to_string());
let mut summary = DiskIoStats {
name: "summary".to_string(),
instance: 0,
stats: Default::default(),
};
let mut hasher = DefaultHasher::new();

for disk in &disk_stats.disk_stats {
summary += disk;
disk.instance.hash(&mut hasher);
}
// Since the summay disk is compared first, make its instance depend on every disk
summary.instance = hasher.finish();
disk_stats.disk_stats.insert(0, summary);
}
}
Expand Down

0 comments on commit f0e4ff6

Please sign in to comment.