Skip to content

Commit

Permalink
fix(nexus/io): set nvmf subsystem as frozen
Browse files Browse the repository at this point in the history
Multiple children may be retired at "the same time". In case the first retires "wins" and
pauses the subsystem, if we simply leave it paused at the end, the other retires will remain
stuck in the pause.
Instead, introduce a new state of Frozen, which allows other pauses to execute their paused
code and at the end the subsystem remains frozen.

Signed-off-by: Tiago Castro <[email protected]>
  • Loading branch information
tiagolobocastro committed Jun 19, 2023
1 parent 5e08424 commit 5980b1e
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 23 deletions.
23 changes: 13 additions & 10 deletions mayastor/src/bdev/nexus/nexus_bdev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,19 @@ impl<'n> Nexus<'n> {
/// Note: in order to handle concurrent resumes properly, this function must
/// be called only from the master core.
pub async fn resume(self: Pin<&mut Self>) -> Result<(), Error> {
self.io_subsystem_mut().resume().await
// If we are faulted then rather than failing all IO back to the
// initiator we can instead leave the subsystem frozen, and wait
// for the control-plane to do something about this.
// Meanwhile the initiator will begin its reconnect loop and won't see
// a swarm of IO failures which could cause a fs to shutdown.
let freeze = match self.status() {
NexusStatus::Faulted => {
tracing::warn!(?self, "Nexus Faulted: will not resume I/Os");
true
}
_ => false,
};
self.io_subsystem_mut().resume(freeze).await
}

/// Suspend any incoming IO to the bdev pausing the controller allows us to
Expand Down Expand Up @@ -1030,15 +1042,6 @@ impl<'n> Nexus<'n> {
})))
.await;
}
// If we are faulted then rather than failing all IO back to the
// initiator we can instead leave the subsystem paused, and wait
// for the control-plane to do something about this.
// Meanwhile the initiator will begin it's reconnect loop and won't see
// a swarm of IO failures which could cause a fs to shutdown.
if self.status() == NexusStatus::Faulted {
tracing::warn!(?self, "Nexus Faulted: not resuming subsystem");
return Ok(());
}
debug!(?self, "RESUMING");
self.resume().await
}
Expand Down
41 changes: 28 additions & 13 deletions mayastor/src/bdev/nexus/nexus_io_subsystem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub(super) enum NexusPauseState {
Unpaused,
Pausing,
Paused,
Frozen,
Unpausing,
}

Expand Down Expand Up @@ -99,7 +100,7 @@ impl<'n> NexusIoSubsystem<'n> {
break;
}
// Subsystem is already paused, increment number of paused.
Err(NexusPauseState::Paused) => {
Err(NexusPauseState::Paused | NexusPauseState::Frozen) => {
trace!(nexus=%self.name, "nexus is already paused, incrementing refcount");
self.pause_cnt.fetch_add(1, Ordering::SeqCst);
break;
Expand Down Expand Up @@ -133,13 +134,17 @@ impl<'n> NexusIoSubsystem<'n> {
/// Resume IO to the bdev.
/// Note: in order to handle concurrent resumes properly, this function must
/// be called only from the master core.
pub(super) async fn resume(&mut self) -> Result<(), NexusError> {
pub(super) async fn resume(
&mut self,
freeze: bool,
) -> Result<(), NexusError> {
assert_eq!(Cores::current(), Cores::first());

trace!(?self.name, "resuming nexus I/O");

loop {
match self.pause_state.load() {
let state = self.pause_state.load();
match state {
// Already unpaused, bail out.
NexusPauseState::Unpaused => {
break;
Expand All @@ -154,20 +159,30 @@ impl<'n> NexusIoSubsystem<'n> {
trace!(?self.name, "completed state transition, retrying Resume operation");
}
// Unpause the subsystem, taking into account the overall number
// of pauses.
NexusPauseState::Paused => {
// of pauses, or leave it frozen.
NexusPauseState::Paused | NexusPauseState::Frozen => {
let v = self.pause_cnt.fetch_sub(1, Ordering::SeqCst);
// In case the last pause discarded, resume the subsystem.
if v == 1 {
if let Some(subsystem) =
NvmfSubsystem::nqn_lookup(&self.name)
{
self.pause_state.store(NexusPauseState::Unpausing);
trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "resuming subsystem");
subsystem.resume().await.unwrap();
trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "subsystem resumed");
if state == NexusPauseState::Frozen || freeze {
if let Some(subsystem) =
NvmfSubsystem::nqn_lookup(&self.name)
{
trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "not resuming frozen subsystem");
}
self.pause_state.store(NexusPauseState::Frozen);
} else {
if let Some(subsystem) =
NvmfSubsystem::nqn_lookup(&self.name)
{
self.pause_state
.store(NexusPauseState::Unpausing);
trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "resuming subsystem");
subsystem.resume().await.unwrap();
trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "subsystem resumed");
}
self.pause_state.store(NexusPauseState::Unpaused);
}
self.pause_state.store(NexusPauseState::Unpaused);
}
break;
}
Expand Down

0 comments on commit 5980b1e

Please sign in to comment.