Skip to content

Commit

Permalink
fix(nvmx): reentrant controller event notification
Browse files Browse the repository at this point in the history
Current implementation of NVMe controller events could lead to deadlocks,
when event listener being notified indirectly triggers the second event notification
action for the same NVMe controller, since NVme controller performs event notification
under protection of a mutex.
This fix makes event invocation deadlock-free.

Signed-off-by: Mikhail Tcymbaliuk <[email protected]>
  • Loading branch information
mtzaurus committed Nov 28, 2022
1 parent f429f45 commit 06c1cbb
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 152 deletions.
185 changes: 141 additions & 44 deletions mayastor/src/bdev/nexus/nexus_bdev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! application needs synchronous mirroring may be required.
use std::{
cmp::min,
fmt::{Display, Formatter},
marker::PhantomPinned,
os::raw::c_void,
Expand Down Expand Up @@ -38,6 +39,7 @@ use crate::{
nexus::{nexus_persistence::PersistentNexusInfo, NexusIoSubsystem},
},
core::{
partition,
Bdev,
BdevHandle,
Command,
Expand Down Expand Up @@ -725,6 +727,83 @@ impl<'n> Nexus<'n> {
);
}

/// Configure nexus's block device to match parameters of the child devices.
async fn setup_nexus_bdev(mut self: Pin<&mut Self>) -> Result<(), Error> {
let name = self.name.clone();

if self.children.is_empty() {
return Err(Error::NexusIncomplete {
name,
});
}

// Determine Nexus block size and data start and end offsets.
let mut start_blk = 0;
let mut end_blk = 0;
let mut blk_size = 0;

for child in self.children.iter() {
let dev = match child.get_device() {
Ok(dev) => dev,
Err(_) => {
return Err(Error::NexusIncomplete {
name,
})
}
};

let nb = dev.num_blocks();
let bs = dev.block_len();

if blk_size == 0 {
blk_size = bs;
} else if bs != blk_size {
return Err(Error::MixedBlockSizes {
name: self.name.clone(),
});
}

match partition::calc_data_partition(self.req_size, nb, bs) {
Some((start, end)) => {
if start_blk == 0 {
start_blk = start;
end_blk = end;
} else {
end_blk = min(end_blk, end);

if start_blk != start {
return Err(Error::ChildGeometry {
child: child.name.clone(),
name,
});
}
}
}
None => {
return Err(Error::ChildTooSmall {
child: child.name.clone(),
name,
num_blocks: nb,
block_size: bs,
})
}
}
}

unsafe {
self.as_mut().set_data_ent_offset(start_blk);
self.as_mut().set_block_len(blk_size as u32);
self.as_mut().set_num_blocks(end_blk - start_blk);
}

info!(
"{:?}: nexus device initialized: start_blk={}, end_blk={}, block_len={}",
self, start_blk, end_blk, blk_size,
);

Ok(())
}

/// Opens the Nexus instance for IO.
/// Once this function is called, the device is visible and can
/// be used for IO.
Expand All @@ -734,50 +813,48 @@ impl<'n> Nexus<'n> {
let mut nex = bdev.data_mut();
assert_eq!(*nex.state.lock(), NexusState::Init);

debug!("Opening nexus {}", nex.name);
info!("{:?}: registering nexus bdev...", nex);

nex.as_mut().try_open_children().await?;
nex.as_mut().setup_nexus_bdev().await?;

// Register the bdev with SPDK and set the callbacks for io channel
// creation.
nex.register_io_device(Some(&nex.name));

debug!(
"{}: IO device registered at {:p}",
nex.name, &*nex as *const Nexus
);
info!("{:?}: IO device registered", nex);

if let Err(err) = bdev.register_bdev() {
error!(
"{:?}: nexus bdev registration failed: {}",
nex,
err.verbose()
);
return Err(err).context(RegisterNexus {
name: nex.name.clone(),
});
}

unsafe { nex.as_mut().get_unchecked_mut().has_io_device = true };

match bdev.register_bdev() {
match nex.as_mut().try_open_children().await {
Ok(_) => {
// Persist the fact that the nexus is now successfully open.
// We have to do this before setting the nexus to open so that
// nexus list does not return this nexus until it is persisted.
nex.persist(PersistOp::Create).await;
nex.as_mut().set_state(NexusState::Open);
unsafe { nex.get_unchecked_mut().has_io_device = true };
Ok(())
info!("{:?}: children opened successfully", nex);
}
Err(err) => {
unsafe {
for child in
nex.as_mut().get_unchecked_mut().children.iter_mut()
{
if let Err(e) = child.close().await {
error!(
"{}: child {} failed to close with error {}",
bdev.data_mut().name,
child.get_name(),
e.verbose()
);
}
}
}
nex.as_mut().set_state(NexusState::Closed);
Err(err).context(RegisterNexus {
name: nex.name.clone(),
})
error!("{:?} failed to open children: {}", nex, err.verbose());
bdev.unregister_bdev();
return Err(err);
}
}
};

// Persist the fact that the nexus is now successfully open.
// We have to do this before setting the nexus to open so that
// nexus list does not return this nexus until it is persisted.
nex.persist(PersistOp::Create).await;
nex.as_mut().set_state(NexusState::Open);
info!("{:?}: nexus bdev registered successfully", nex);

Ok(())
}

/// Destroy the Nexus.
Expand Down Expand Up @@ -1411,36 +1488,56 @@ async fn nexus_create_internal(
}
}

// let ni = nexus_bdev.data_mut();
match Nexus::register_instance(&mut nexus_bdev).await {
Err(Error::NexusIncomplete {
..
name, ..
}) => {
// We still have code that waits for children to come online,
// although this currently only works for config files.
// We need to explicitly clean up child devices
// if we get this error.
error!(
"failed to open nexus {}: not all children are available",
name
"{:?}: not all child devices are available",
nexus_bdev.data()
);
let ni = nexus_bdev.data();
for child in ni.children.iter() {

let uris = nexus_bdev
.data()
.children
.iter()
.map(|c| c.name.clone())
.collect::<Vec<_>>();

for u in uris {
// TODO: children may already be destroyed
// TODO: mutability violation
let _ = device_destroy(&child.name).await;
if let Err(e) = device_destroy(&u).await {
error!(
"{:?}: failed to destroy child device {}: {:?}",
nexus_bdev.data(),
u,
e,
);
}
}

Err(Error::NexusCreate {
name: String::from(name),
name,
})
}

Err(error) => {
error!("failed to open nexus {}: {}", name, error);
error!(
"{:?}: failed to open nexus: {:?}",
nexus_bdev.data(),
error
);
nexus_bdev.data_mut().close_children().await;
Err(error)
}

Ok(_) => Ok(()),
Ok(_) => {
info!("{:?}: nexus created ok", nexus_bdev.data());
Ok(())
}
}
}
Loading

0 comments on commit 06c1cbb

Please sign in to comment.