Skip to content

Commit

Permalink
fix(nexus): fixing I/O race when disconnecting retired device handles
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Savitskiy <[email protected]>
  • Loading branch information
dsavitskiy committed Dec 4, 2023
1 parent 1d77b74 commit c54fe99
Show file tree
Hide file tree
Showing 8 changed files with 272 additions and 73 deletions.
14 changes: 14 additions & 0 deletions io-engine/src/bdev/nexus/nexus_bdev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,20 @@ impl<'n> Nexus<'n> {
unsafe { Pin::new_unchecked(self.bdev_mut()) }
}

/// Gets a nexus reference from an untyped bdev.
/// # Warning:
/// No checks are performed (e.g. bdev module name check), as it is assumed
/// that the provided bdev is a nexus bdev.
#[inline(always)]
pub(crate) unsafe fn unsafe_from_untyped_bdev(
bdev: spdk_rs::UntypedBdev,
) -> &'n Nexus<'n> {
spdk_rs::Bdev::<Nexus<'n>>::unsafe_from_inner_ptr(
bdev.unsafe_inner_ptr() as *mut _,
)
.data()
}

/// Sets the required alignment of the Nexus.
pub(crate) unsafe fn set_required_alignment(
self: Pin<&mut Self>,
Expand Down
70 changes: 56 additions & 14 deletions io-engine/src/bdev/nexus/nexus_bdev_children.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,10 @@ impl<'n> Nexus<'n> {
// Close and remove the child.
let res = match self.lookup_child(uri) {
Some(child) => {
// Remove child from the I/O path.
// Detach the child from the I/O path, and close its handles.
if let Some(device) = child.get_device_name() {
self.disconnect_device(&device).await;
self.detach_device(&device).await;
self.disconnect_all_detached_devices().await;
}

// Close child's device.
Expand Down Expand Up @@ -974,18 +975,39 @@ impl<'n> Nexus<'n> {
return Ok(());
}

// Disconnect the device from all the channels.
// Detach the device from all the channels.
//
// After disconnecting, the device will no longer be used by the
// channels, and all I/Os failing due to this device will eventually
// resubmit and succeeded (if any healthy children are left).
self.disconnect_device(&dev).await;
//
// Device disconnection is done in two steps (detach, than disconnect)
// in order to prevent an I/O race when retiring a device.
self.detach_device(&dev).await;

// Disconnect the devices with failed controllers _before_ pause,
// otherwise pause would stuck. Keep all controoled that are _not_
// failed (e.g., in the case I/O failed due to ENOSPC).
self.traverse_io_channels_async((), |channel, _| {
channel.disconnect_detached_devices(|h| h.is_ctrlr_failed());
})
.await;

// Destroy (close) the device. The subsystem must be paused to do this
// properly.
// Disconnect, destroy and close the device. The subsystem must be
// paused to do this properly.
{
debug!("{self:?}: retire: pausing...");
self.as_mut().pause().await?;
debug!("{self:?}: retire: pausing ok");
let res = self.as_mut().pause().await;
match &res {
Ok(_) => debug!("{self:?}: retire: pausing ok"),
Err(e) => warn!("{self:?}: retire: pausing failed: {e}"),
};

// Disconnect the all previously detached device handles. This has
// to be done after the pause to prevent an I/O race.
self.disconnect_all_detached_devices().await;

res?;

self.child_retire_destroy_device(&dev).await;

Expand Down Expand Up @@ -1055,20 +1077,39 @@ impl<'n> Nexus<'n> {
Ok(())
}

/// Disconnects a device from all I/O channels.
pub(crate) async fn disconnect_device(&self, dev: &str) {
/// Detaches the device's handles from all I/O channels.
///
/// The detached handles must be disconnected and dropped by a
/// `disconnect_detached_devices()` call.
///
/// Device disconnection is done in two steps (detach, than disconnect) in
/// order to prevent an I/O race when retiring a device.
pub(crate) async fn detach_device(&self, dev: &str) {
if !self.has_io_device {
return;
}

debug!("{self:?}: disconnecting '{dev}' from all channels ...");
debug!("{self:?}: detaching '{dev}' from all channels...");

self.traverse_io_channels_async(dev, |channel, dev| {
channel.disconnect_device(dev);
channel.detach_device(dev);
})
.await;

debug!("{self:?}: '{dev}' detached from all I/O channels");
}

/// Disconnects all the detached devices on all I/O channels by dropping
/// their handles.
pub(crate) async fn disconnect_all_detached_devices(&self) {
debug!("{self:?}: disconnecting all detached devices ...");

self.traverse_io_channels_async((), |channel, _| {
channel.disconnect_detached_devices(|_| true);
})
.await;

debug!("{self:?}: '{dev}' disconnected from all I/O channels");
debug!("{self:?}: disconnected all detached devices");
}

/// Destroys the device being retired.
Expand Down Expand Up @@ -1143,7 +1184,8 @@ impl<'n> Nexus<'n> {

// Step 1: Close I/O channels for all children.
for dev in nexus.child_devices() {
nexus.disconnect_device(&dev).await;
nexus.detach_device(&dev).await;
nexus.disconnect_all_detached_devices().await;

device_cmd_queue().enqueue(DeviceCommand::RetireDevice {
nexus_name: nexus.name.clone(),
Expand Down
66 changes: 59 additions & 7 deletions io-engine/src/bdev/nexus/nexus_channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use spdk_rs::Thread;
pub struct NexusChannel<'n> {
writers: Vec<Box<dyn BlockDeviceHandle>>,
readers: Vec<Box<dyn BlockDeviceHandle>>,
detached: Vec<Box<dyn BlockDeviceHandle>>,
io_logs: Vec<IOLogChannel>,
previous_reader: UnsafeCell<usize>,
fail_fast: u32,
Expand Down Expand Up @@ -123,6 +124,7 @@ impl<'n> NexusChannel<'n> {
Self {
writers,
readers,
detached: Vec::new(),
io_logs: nexus.io_log_channels(),
previous_reader: UnsafeCell::new(0),
nexus: unsafe { nexus.pinned_mut() },
Expand Down Expand Up @@ -209,16 +211,66 @@ impl<'n> NexusChannel<'n> {
}
}

/// Disconnects a child device from the I/O path.
pub fn disconnect_device(&mut self, device_name: &str) {
/// Detaches a child device from this I/O channel, moving the device's
/// handles to the list of detached devices to disconnect later.
///
/// The detached handles must be disconnected and dropped by a
/// `disconnect_detached_devices()` call.
pub(super) fn detach_device(&mut self, device_name: &str) {
self.previous_reader = UnsafeCell::new(0);

self.readers
.retain(|c| c.get_device().device_name() != device_name);
self.writers
.retain(|c| c.get_device().device_name() != device_name);
if let Some(d) = self
.readers
.iter()
.position(|c| c.get_device().device_name() == device_name)
{
let t = self.readers.remove(d);
self.detached.push(t);
}

if let Some(d) = self
.writers
.iter()
.position(|c| c.get_device().device_name() == device_name)
{
let t = self.writers.remove(d);
self.detached.push(t);
}

debug!("{self:?}: device '{device_name}' detached");
}

/// Disconnects previously detached device handles by dropping them.
/// Devices to drop are filtered by the given predicate: true to drop
/// a device, false to keep it.
pub(super) fn disconnect_detached_devices<F>(&mut self, mut drop_pred: F)
where
F: FnMut(&dyn BlockDeviceHandle) -> bool,
{
let cnt = self
.detached
.iter()
.filter(|h| drop_pred(h.as_ref()))
.count();

if cnt == 0 {
debug!("{self:?}: no devices to disconnect");
return;
}

debug!("{self:?}: device '{device_name}' disconnected");
let n = self.detached.len();

info!(
"{self:?}: disconnecting {cnt} of {n} detached device handles...",
);

self.detached.retain(|h| !drop_pred(h.as_ref()));

info!(
"{self:?}: {cnt} of {n} detached device handles disconnected, \
{m} remain(s)",
m = self.detached.len()
);
}

/// Refreshing our channels simply means that we either have a child going
Expand Down
3 changes: 2 additions & 1 deletion io-engine/src/bdev/nexus/nexus_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,8 @@ impl<'n> NexusBio<'n> {
// set the IO as failed in the submission stage.
self.ctx_mut().failed += 1;

self.channel_mut().disconnect_device(&device);
self.channel_mut().detach_device(&device);
self.channel_mut().disconnect_detached_devices(|_| true);

if let Some(log) = self.fault_device(
&device,
Expand Down
2 changes: 2 additions & 0 deletions io-engine/src/bdev/nvmx/controller_inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ impl SpdkNvmeController {
}
}

/// Returns a pointer to the underlying SPDK struct.
#[inline(always)]
pub fn as_ptr(&self) -> *mut spdk_nvme_ctrlr {
self.0.as_ptr()
}
Expand Down
5 changes: 5 additions & 0 deletions io-engine/src/bdev/nvmx/handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1523,6 +1523,11 @@ impl BlockDeviceHandle for NvmeDeviceHandle {
let id = inner.ext_host_id();
Ok(*id)
}

/// Determines if the underlying controller is failed.
fn is_ctrlr_failed(&self) -> bool {
self.ctrlr.is_failed
}
}

impl Drop for NvmeDeviceHandle {
Expand Down
5 changes: 5 additions & 0 deletions io-engine/src/core/block_device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,11 @@ pub trait BlockDeviceHandle {
cb: IoCompletionCallback,
cb_arg: IoCompletionCallbackArg,
) -> Result<(), CoreError>;

/// Determines if the underlying controller is failed.
fn is_ctrlr_failed(&self) -> bool {
false
}
}

fn block_device_io_completion(
Expand Down
Loading

0 comments on commit c54fe99

Please sign in to comment.