From dbf97b698bafd6262f1b12eaf398efe48880f6a6 Mon Sep 17 00:00:00 2001 From: Diwakar Sharma Date: Mon, 9 Dec 2024 13:34:17 +0000 Subject: [PATCH] fix(nexus): don't persist if child faults during nexus create When nexus is being created with a single child, and that child goes into retire path before nexus is open, then the child gets persisted as unhealthy. This will cause volume to never be able to attach later on. Signed-off-by: Diwakar Sharma --- io-engine/src/bdev/nexus/nexus_persistence.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/io-engine/src/bdev/nexus/nexus_persistence.rs b/io-engine/src/bdev/nexus/nexus_persistence.rs index 7fefa8918..2f3e6837a 100644 --- a/io-engine/src/bdev/nexus/nexus_persistence.rs +++ b/io-engine/src/bdev/nexus/nexus_persistence.rs @@ -104,6 +104,15 @@ impl<'n> Nexus<'n> { }; nexus_info.children.push(child_info); }); + // We started with this child because it was healthy in etcd, or isn't there at all. + // Being unhealthy here means it is undergoing a fault/retire before nexus is open. + if nexus_info.children.len() == 1 && !nexus_info.children[0].healthy { + warn!("{self:?} Not persisting: the only child went unhealthy during nexus creation"); + return Err(Error::NexusCreate { + name: self.name.clone(), + reason: "only child is unhealthy".to_string(), + }); + } } PersistOp::AddChild { child_uri,