Skip to content

Commit

Permalink
fix(nexus): fix
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Savitskiy <[email protected]>
  • Loading branch information
dsavitskiy committed Dec 2, 2023
1 parent 4e24a31 commit 55ba1bb
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 13 deletions.
19 changes: 14 additions & 5 deletions io-engine/src/bdev/nexus/nexus_bdev_children.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ impl<'n> Nexus<'n> {
// Detach the child from the I/O path, and close its handles.
if let Some(device) = child.get_device_name() {
self.detach_device(&device).await;
self.disconnect_detached_devices().await;
self.disconnect_all_detached_devices().await;
}

// Close child's device.
Expand Down Expand Up @@ -985,6 +985,13 @@ impl<'n> Nexus<'n> {
// in order to prevent an I/O race when retiring a device.
self.detach_device(&dev).await;

info!("d'conn ctrlr-failed...");
self.traverse_io_channels_async((), |channel, _| {
channel.disconnect_detached_devices(|h| h.is_ctrlr_failed());
})
.await;
info!("d'conn ctrlr-failed ok");

// Disconnect, destroy and close the device. The subsystem must be
// paused to do this properly.
{
Expand All @@ -994,7 +1001,9 @@ impl<'n> Nexus<'n> {

// Disconnect the previously detached device handles. This has to be
// done after the pause to prevent an I/O race.
self.disconnect_detached_devices().await;
info!("d'conn all...");
self.disconnect_all_detached_devices().await;
info!("d'conn all ok");

res?;

Expand Down Expand Up @@ -1090,11 +1099,11 @@ impl<'n> Nexus<'n> {

/// Disconnects all the detached devices on all I/O channels by dropping
/// their handles.
pub(crate) async fn disconnect_detached_devices(&self) {
pub(crate) async fn disconnect_all_detached_devices(&self) {
debug!("{self:?}: disconnecting all detached devices ...");

self.traverse_io_channels_async((), |channel, _| {
channel.disconnect_detached_devices();
channel.disconnect_detached_devices(|_| true);
})
.await;

Expand Down Expand Up @@ -1174,7 +1183,7 @@ impl<'n> Nexus<'n> {
// Step 1: Close I/O channels for all children.
for dev in nexus.child_devices() {
nexus.detach_device(&dev).await;
nexus.disconnect_detached_devices().await;
nexus.disconnect_all_detached_devices().await;

device_cmd_queue().enqueue(DeviceCommand::RetireDevice {
nexus_name: nexus.name.clone(),
Expand Down
37 changes: 30 additions & 7 deletions io-engine/src/bdev/nexus/nexus_channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,37 @@ impl<'n> NexusChannel<'n> {
debug!("{self:?}: device '{device_name}' detached");
}

/// Disconnects all previously detached device handles by dropping them.
pub(super) fn disconnect_detached_devices(&mut self) {
debug!(
"{self:?}: disconnecting {n} detached device handles...",
n = self.detached.len()
/// Disconnects previously detached device handles by dropping them.
/// Devices to drop are filtered by the given predicate: true to drop
/// a device, false to keep it.
pub(super) fn disconnect_detached_devices<F>(&mut self, mut drop_pred: F)
where
F: FnMut(&dyn BlockDeviceHandle) -> bool,
{
let cnt = self
.detached
.iter()
.filter(|h| drop_pred(h.as_ref()))
.count();

if cnt == 0 {
debug!("{self:?}: no devices to disconnect");
return;
}

let n = self.detached.len();

info!(
"{self:?}: disconnecting {cnt} of {n} detached device handles...",
);

self.detached.retain(|h| !drop_pred(h.as_ref()));

info!(
"{self:?}: {cnt} of {n} detached device handles disconnected, \
{m} remain(s)",
m = self.detached.len()
);
self.detached.clear();
debug!("{self:?}: all detached device handles disconnected");
}

/// Refreshing our channels simply means that we either have a child going
Expand Down
2 changes: 1 addition & 1 deletion io-engine/src/bdev/nexus/nexus_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ impl<'n> NexusBio<'n> {
self.ctx_mut().failed += 1;

self.channel_mut().detach_device(&device);
self.channel_mut().disconnect_detached_devices();
self.channel_mut().disconnect_detached_devices(|_| true);

if let Some(log) = self.fault_device(
&device,
Expand Down
2 changes: 2 additions & 0 deletions io-engine/src/bdev/nvmx/controller_inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ impl SpdkNvmeController {
}
}

/// Returns a pointer to the underlying SPDK struct.
#[inline(always)]
pub fn as_ptr(&self) -> *mut spdk_nvme_ctrlr {
self.0.as_ptr()
}
Expand Down
5 changes: 5 additions & 0 deletions io-engine/src/bdev/nvmx/handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1523,6 +1523,11 @@ impl BlockDeviceHandle for NvmeDeviceHandle {
let id = inner.ext_host_id();
Ok(*id)
}

/// TODO
fn is_ctrlr_failed(&self) -> bool {
self.ctrlr.is_failed
}
}

impl Drop for NvmeDeviceHandle {
Expand Down
5 changes: 5 additions & 0 deletions io-engine/src/core/block_device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,11 @@ pub trait BlockDeviceHandle {
cb: IoCompletionCallback,
cb_arg: IoCompletionCallbackArg,
) -> Result<(), CoreError>;

/// TODO
fn is_ctrlr_failed(&self) -> bool {
false
}
}

fn block_device_io_completion(
Expand Down

0 comments on commit 55ba1bb

Please sign in to comment.