From b250a6d933e725601b188d8f526fb741a862bf56 Mon Sep 17 00:00:00 2001 From: Colin Jones Date: Wed, 3 Mar 2021 16:56:49 +0000 Subject: [PATCH] fix(nexus): mayastor hangs when creating a nexus if there is an error creating a child When creating a new nexus, as a final step, the newly created nexus is added to a global list of nexus instances. This list is required when handling bdev removal - specifically it is used by lookup_child_from_bdev() to determine the nexus child that is associated with a given bdev. The problem occurs when there is an error creating a nexus, and proper cleanup necessitates the removal of some children that may have already been successfully created. The removal code requires the owning nexus to be present in the global list in order to successfully remove the children, but the nexus has not yet been added to the list, as it has not been successfully created. The solution is to add the (partially created) nexus to the list of global instances as early as possible in the creation process. This means that we also need to ensure that it is removed again if any error is encountered. resolves CAS-757 --- control-plane/tests/tests/nexus.rs | 7 +- mayastor/src/bdev/nexus/nexus_bdev.rs | 105 +++++++++++------- .../src/bdev/nexus/nexus_bdev_children.rs | 8 +- mayastor/tests/child_size.rs | 78 +++++++++++++ 4 files changed, 147 insertions(+), 51 deletions(-) create mode 100644 mayastor/tests/child_size.rs diff --git a/control-plane/tests/tests/nexus.rs b/control-plane/tests/tests/nexus.rs index 537b34fd8..e9386412b 100644 --- a/control-plane/tests/tests/nexus.rs +++ b/control-plane/tests/tests/nexus.rs @@ -1,5 +1,3 @@ -#![feature(allow_fail)] - pub mod common; use common::*; @@ -19,14 +17,11 @@ async fn create_nexus_malloc() { .unwrap(); } -// FIXME: CAS-737 #[actix_rt::test] -#[allow_fail] async fn create_nexus_sizes() { let cluster = ClusterBuilder::builder() .with_rest_timeout(std::time::Duration::from_secs(1)) - // don't log whilst we have the allow_fail - .compose_build(|c| c.with_logs(false)) + .compose_build(|c| c.with_logs(true)) .await .unwrap(); diff --git a/mayastor/src/bdev/nexus/nexus_bdev.rs b/mayastor/src/bdev/nexus/nexus_bdev.rs index 9bf7dfe0a..53193fc02 100644 --- a/mayastor/src/bdev/nexus/nexus_bdev.rs +++ b/mayastor/src/bdev/nexus/nexus_bdev.rs @@ -11,7 +11,7 @@ use std::{ os::raw::c_void, }; -use futures::channel::oneshot; +use futures::{channel::oneshot, future::join_all}; use nix::errno::Errno; use serde::Serialize; use snafu::{ResultExt, Snafu}; @@ -424,10 +424,7 @@ impl Nexus { max_io_attempts: cfg.err_store_opts.max_io_attempts, }); - n.bdev.set_uuid(match uuid { - Some(uuid) => Some(uuid.to_string()), - None => None, - }); + n.bdev.set_uuid(uuid.map(String::from)); if let Some(child_bdevs) = child_bdevs { n.register_children(child_bdevs); @@ -1033,11 +1030,13 @@ impl Nexus { } } -/// If we fail to create one of the children we will fail the whole operation -/// destroy any created children and return the error. Once created, and we -/// bring the nexus online, there still might be a configuration mismatch that -/// would prevent the nexus to come online. We can only determine this -/// (currently) when online, so we check the errors twice for now. +/// Create a new nexus and bring it online. +/// If we fail to create any of the children, then we fail the whole operation. +/// On failure, we must cleanup by destroying any children that were +/// successfully created. Also, once the nexus is created, there still might +/// be a configuration mismatch that would prevent us from going online. +/// Currently, we can only determine this once we are already online, +/// and so we check the errors twice for now. #[tracing::instrument(level = "debug")] pub async fn nexus_create( name: &str, @@ -1047,61 +1046,85 @@ pub async fn nexus_create( ) -> Result<(), Error> { // global variable defined in the nexus module let nexus_list = instances(); + if nexus_list.iter().any(|n| n.name == name) { - // instead of error we return Ok without making sure that also the - // children are the same, which seems wrong + // FIXME: Instead of error, we return Ok without checking + // that the children match, which seems wrong. return Ok(()); } - let mut ni = Nexus::new(name, size, uuid, None); + // Create a new Nexus object, and immediately add it to the global list. + // This is necessary to ensure proper cleanup, as the code responsible for + // closing a child assumes that the nexus to which it belongs will appear + // in the global list of nexus instances. We must also ensure that the + // nexus instance gets removed from the global list if an error occurs. + nexus_list.push(Nexus::new(name, size, uuid, None)); + + // Obtain a reference to the newly created Nexus object. + let ni = + nexus_list + .iter_mut() + .find(|n| n.name == name) + .ok_or_else(|| Error::NexusNotFound { + name: String::from(name), + })?; for child in children { - if let Err(err) = ni.create_and_register(child).await { - error!("failed to create child {}: {}", child, err); - ni.destroy_children().await; - return Err(err).context(CreateChild { - name: ni.name.clone(), + if let Err(error) = ni.create_and_register(child).await { + error!( + "failed to create nexus {}: failed to create child {}: {}", + name, child, error + ); + ni.close_children().await; + nexus_list.retain(|n| n.name != name); + return Err(Error::CreateChild { + source: error, + name: String::from(name), }); } } match ni.open().await { - // we still have code that waits for children to come online - // this however only works for config files so we need to clean up - // if we get the below error Err(Error::NexusIncomplete { .. }) => { - info!("deleting nexus due to missing children"); - for child in children { - if let Err(e) = bdev_destroy(child).await { - error!("failed to destroy child during cleanup {}", e); - } - } - - return Err(Error::NexusCreate { + // We still have code that waits for children to come online, + // although this currently only works for config files. + // We need to explicitly clean up child bdevs if we get this error. + error!("failed to open nexus {}: missing children", name); + destroy_child_bdevs(name, children).await; + nexus_list.retain(|n| n.name != name); + Err(Error::NexusCreate { name: String::from(name), - }); + }) } - Err(e) => { - error!("failed to open nexus {}: {}", ni.name, e); - ni.destroy_children().await; - return Err(e); + Err(error) => { + error!("failed to open nexus {}: {}", name, error); + ni.close_children().await; + nexus_list.retain(|n| n.name != name); + Err(error) } - Ok(_) => nexus_list.push(ni), + Ok(_) => Ok(()), + } +} + +/// Destroy list of child bdevs +async fn destroy_child_bdevs(name: &str, list: &[String]) { + let futures = list.iter().map(String::as_str).map(bdev_destroy); + let results = join_all(futures).await; + if results.iter().any(|c| c.is_err()) { + error!("{}: Failed to destroy child bdevs", name); } - Ok(()) } /// Lookup a nexus by its name (currently used only by test functions). pub fn nexus_lookup(name: &str) -> Option<&mut Nexus> { - if let Some(nexus) = instances().iter_mut().find(|n| n.name == name) { - Some(nexus) - } else { - None - } + instances() + .iter_mut() + .find(|n| n.name == name) + .map(AsMut::as_mut) } impl Display for Nexus { diff --git a/mayastor/src/bdev/nexus/nexus_bdev_children.rs b/mayastor/src/bdev/nexus/nexus_bdev_children.rs index 49be55e6e..d8443d688 100644 --- a/mayastor/src/bdev/nexus/nexus_bdev_children.rs +++ b/mayastor/src/bdev/nexus/nexus_bdev_children.rs @@ -345,13 +345,13 @@ impl Nexus { }) } } - /// destroy all children that are part of this nexus closes any child - /// that might be open first - pub(crate) async fn destroy_children(&mut self) { + + /// Close each child that belongs to this nexus. + pub(crate) async fn close_children(&mut self) { let futures = self.children.iter_mut().map(|c| c.close()); let results = join_all(futures).await; if results.iter().any(|c| c.is_err()) { - error!("{}: Failed to destroy child", self.name); + error!("{}: Failed to close children", self.name); } } diff --git a/mayastor/tests/child_size.rs b/mayastor/tests/child_size.rs new file mode 100644 index 000000000..40aead580 --- /dev/null +++ b/mayastor/tests/child_size.rs @@ -0,0 +1,78 @@ +use tracing::error; + +use once_cell::sync::OnceCell; + +use common::MayastorTest; +use mayastor::{ + bdev::{nexus_create, nexus_lookup}, + core::{Bdev, MayastorCliArgs}, +}; + +pub mod common; + +async fn create_nexus(size: u64) -> bool { + let children = vec![ + String::from("malloc:///m0?size_mb=32"), + format!("malloc:///m1?size_mb={}", size), + ]; + if let Err(error) = + nexus_create("core_nexus", size * 1024 * 1024, None, &children).await + { + error!("nexus_create() failed: {}", error); + return false; + } + true +} + +static MS: OnceCell = OnceCell::new(); + +fn mayastor() -> &'static MayastorTest<'static> { + let ms = MS.get_or_init(|| MayastorTest::new(MayastorCliArgs::default())); + &ms +} + +#[tokio::test] +async fn child_size_ok() { + mayastor() + .spawn(async { + assert_eq!(Bdev::bdev_first().into_iter().count(), 0); + assert!(create_nexus(16).await); + + let bdev = Bdev::lookup_by_name("core_nexus").unwrap(); + assert_eq!(bdev.name(), "core_nexus"); + + let bdev = + Bdev::lookup_by_name("m0").expect("child bdev m0 not found"); + assert_eq!(bdev.name(), "m0"); + + let bdev = + Bdev::lookup_by_name("m1").expect("child bdev m1 not found"); + assert_eq!(bdev.name(), "m1"); + + let nexus = nexus_lookup("core_nexus").expect("nexus not found"); + nexus.destroy().await.unwrap(); + + assert!(nexus_lookup("core_nexus").is_none()); + assert!(Bdev::lookup_by_name("core_nexus").is_none()); + assert!(Bdev::lookup_by_name("m0").is_none()); + assert!(Bdev::lookup_by_name("m1").is_none()); + assert_eq!(Bdev::bdev_first().into_iter().count(), 0); + }) + .await; +} + +#[tokio::test] +async fn child_too_small() { + mayastor() + .spawn(async { + assert_eq!(Bdev::bdev_first().into_iter().count(), 0); + assert!(!create_nexus(4).await); + + assert!(nexus_lookup("core_nexus").is_none()); + assert!(Bdev::lookup_by_name("core_nexus").is_none()); + assert!(Bdev::lookup_by_name("m0").is_none()); + assert!(Bdev::lookup_by_name("m1").is_none()); + assert_eq!(Bdev::bdev_first().into_iter().count(), 0); + }) + .await; +}