Skip to content

Commit

Permalink
fix(nexus): do not persist faulted state of the last replica
Browse files Browse the repository at this point in the history
In case the last replica fails, it is not marked as not healthy
in the ETCD, which allows Caontrol Plane to recover the nexus later
without ambiguity (as the replica with the most recent user data
remains known).

Resolves: CAS-1284
  • Loading branch information
mtzaurus committed Jan 31, 2022
1 parent 37e4ab0 commit 4112646
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 4 deletions.
39 changes: 37 additions & 2 deletions mayastor/src/bdev/nexus/nexus_bdev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -967,8 +967,43 @@ impl Nexus {
// schedule the deletion of the child eventhough etcd has not been
// updated yet we do not need to wait for that to
// complete anyway.
MWQ.enqueue(Command::RemoveDevice(self.name.clone(), name));
self.persist(PersistOp::Update((uri.clone(), child.state())))
MWQ.enqueue(Command::RemoveDevice(self.name.clone(), name.clone()));

// Do not persist child state in case it's the last healthy child of
// the nexus: let Control Plane reconstruct the nexus
// using this device as the replica with the most recent
// user data.
self.persist(PersistOp::UpdateCond(
(uri.clone(), child.state(), &|nexus_info| {
// Determine the amount of healthy replicas in the persistent state and
// check against the last healthy replica remaining.
let num_healthy = nexus_info.children.iter().fold(0, |n, c| {
if c.healthy {
n + 1
} else {
n
}
});

match num_healthy {
0 => {
warn!(
"nexus {}: no healthy replicas persent in persistent store when retiring replica {}:
not persisting the replica state",
&name, &uri,
);
false
}
1 => {
warn!(
"nexus {}: retiring the last healthy replica {}, not persisting the replica state",
&name, &uri,
);
false
},
_ => true,
}
})))
.await;
}
self.resume().await
Expand Down
25 changes: 23 additions & 2 deletions mayastor/src/bdev/nexus/nexus_persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,25 @@ pub struct ChildInfo {
}

/// Defines the type of persist operations.
pub(crate) enum PersistOp {
pub(crate) enum PersistOp<'a> {
/// Create a persistent entry.
Create,
/// Add a child to an existing persistent entry.
AddChild((ChildUri, ChildState)),
/// Update a persistent entry.
Update((ChildUri, ChildState)),
/// Update a persistent entry only when a precondition on this NexusInfo
/// holds. Predicate is called under protection of the NexusInfo lock,
/// so the check is assumed to be atomic and not interfering with other
/// modifications of the same NexusInfo.
UpdateCond((ChildUri, ChildState, &'a dyn Fn(&NexusInfo) -> bool)),
/// Save the clean shutdown variable.
Shutdown,
}

impl Nexus {
/// Persist information to the store.
pub(crate) async fn persist(&self, op: PersistOp) {
pub(crate) async fn persist(&self, op: PersistOp<'_>) {
if !PersistentStore::enabled() {
return;
}
Expand Down Expand Up @@ -87,6 +92,22 @@ impl Nexus {
}
});
}
// Only update the state of the child if the precondition holds.
PersistOp::UpdateCond((uri, state, f)) => {
// Do not persist the state if predicate fails.
if !f(&nexus_info) {
return;
}

let uuid =
NexusChild::uuid(&uri).expect("Failed to get child UUID.");

nexus_info.children.iter_mut().for_each(|c| {
if c.uuid == uuid {
c.healthy = Self::child_healthy(&state);
}
});
}
PersistOp::Shutdown => {
// Only update the clean shutdown variable. Do not update the
// child state information.
Expand Down

0 comments on commit 4112646

Please sign in to comment.