Skip to content

Commit

Permalink
Try #549:
Browse files Browse the repository at this point in the history
  • Loading branch information
mayastor-bors committed May 25, 2023
2 parents 098c25d + 3ad0bc4 commit 44b291c
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ impl TaskPoller for GarbageCollector {
Err(_) => continue,
};
// if the nexus is in shutdown state don't let the reconcilers act on it.
if !nexus.lock().is_shutdown() {
let _ = nexus.garbage_collect(context).await;
}
let _ = nexus.garbage_collect(context).await;
}
PollResult::Ok(PollerState::Idle)
}
Expand Down
165 changes: 161 additions & 4 deletions control-plane/agents/src/bin/core/tests/volume/switchover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,179 @@

use deployer_cluster::{Cluster, ClusterBuilder};
use grpc::operations::{
pool::traits::PoolOperations, registry::traits::RegistryOperations,
replica::traits::ReplicaOperations, volume::traits::VolumeOperations,
nexus::traits::NexusOperations, pool::traits::PoolOperations,
registry::traits::RegistryOperations, replica::traits::ReplicaOperations,
volume::traits::VolumeOperations,
};
use std::{collections::HashMap, time::Duration};
use stor_port::{
transport_api::{ReplyErrorKind, ResourceKind},
types::v0::{
openapi::models::SpecStatus,
store::nexus::NexusSpec,
transport::{
CreateVolume, DestroyShutdownTargets, Filter, GetSpecs, Nexus, PublishVolume,
RepublishVolume, VolumeShareProtocol,
CreateVolume, DestroyShutdownTargets, DestroyVolume, Filter, GetSpecs, Nexus,
PublishVolume, RepublishVolume, VolumeShareProtocol,
},
},
};
use tokio::time::sleep;

// This test: Creates a three io engine cluster
// Creates volume with 2 replicas and publishes it to create nexus
// Republishes nexus to a new node
// Stops node housing a previous nexus
// Destroys volume
// Starts node which was stopped and expects old nexus cleanup.
#[tokio::test]
async fn old_nexus_delete_after_vol_destroy() {
let reconcile_period = Duration::from_secs(1);
let cluster = ClusterBuilder::builder()
.with_rest(true)
.with_agents(vec!["core"])
.with_io_engines(3)
.with_tmpfs_pool(POOL_SIZE_BYTES)
.with_cache_period("1s")
.with_reconcile_period(reconcile_period, reconcile_period)
.build()
.await
.unwrap();

let vol_client = cluster.grpc_client().volume();
let nexus_client = cluster.grpc_client().nexus();
let rest_client = cluster.rest_v00();
let spec_client = rest_client.specs_api();

let vol = vol_client
.create(
&CreateVolume {
uuid: "ec4e66fd-3b33-4439-b504-d49aba53da26".try_into().unwrap(),
size: 5242880,
replicas: 2,
..Default::default()
},
None,
)
.await
.expect("unable to create volume");

let vol = vol_client
.publish(
&PublishVolume {
uuid: vol.uuid().clone(),
share: None,
target_node: Some(cluster.node(0)),
publish_context: HashMap::new(),
frontend_nodes: vec![cluster.node(1).to_string()],
},
None,
)
.await
.expect("failed to publish volume");

let target_node = vol.state().target.unwrap().node;
assert_eq!(target_node, cluster.node(0));

let vol = vol_client
.republish(
&RepublishVolume {
uuid: vol.uuid().clone(),
target_node: Some(cluster.node(1)),
share: VolumeShareProtocol::Nvmf,
reuse_existing: false,
frontend_node: cluster.node(1),
reuse_existing_fallback: false,
},
None,
)
.await
.expect("unable to republish volume");

let republished_node = vol.state().target.unwrap().node;
assert_ne!(
republished_node, target_node,
"expected nexus node to change post republish"
);

let nexuses = nexus_client
.get(Filter::None, None)
.await
.expect("Could not list nexuses");
assert_eq!(nexuses.0.len(), 2, "expected two nexuses after republish");

cluster
.composer()
.stop(cluster.node(0).as_str())
.await
.expect("failed to stop container");

let nexuses_after_pause = nexus_client
.get(Filter::None, None)
.await
.expect("could not list nexuses");
sleep(Duration::from_secs(2)).await;

assert_eq!(
nexuses_after_pause.0.len(),
1,
"expected 1 nexus after stop"
);

let spec = spec_client
.get_specs()
.await
.expect("expected to retrieve specs");

let nexus_spec = spec.nexuses;
assert_eq!(nexus_spec.len(), 2, "spec should contain 2 nexuses");

vol_client
.destroy(
&DestroyVolume {
uuid: vol.uuid().clone(),
},
None,
)
.await
.expect("failed to delete volume");

let spec = spec_client
.get_specs()
.await
.expect("expected to retrieve specs");

let nexus_spec = spec.nexuses;
assert_eq!(
nexus_spec.len(),
1,
"spec should contain one nexus after vol destroy with old target node unreachable"
);

let n = nexus_spec.get(0).unwrap();
assert_eq!(
n.status,
SpecStatus::Deleting,
"failed to mark nexus spec as Deleting"
);

cluster
.composer()
.start(cluster.node(0).as_str())
.await
.expect("failed to start container");
sleep(Duration::from_secs(5)).await;

let spec = cluster
.rest_v00()
.specs_api()
.get_specs()
.await
.expect("expected to retrieve specs");

let nexus_spec = spec.nexuses;
assert_eq!(nexus_spec.len(), 0, "spec should not contain any nexus");
}

#[tokio::test]
async fn lazy_delete_shutdown_targets() {
const POOL_SIZE_BYTES: u64 = 128 * 1024 * 1024;
Expand Down
4 changes: 3 additions & 1 deletion control-plane/agents/src/bin/core/volume/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ impl ResourceLifecycle for OperationGuardArc<VolumeSpec> {
let nexus = nexus_arc.lock().deref().clone();
match nexus_arc.operation_guard_wait().await {
Ok(mut guard) => {
let destroy = DestroyNexus::from(&nexus).with_disown(&request.uuid);
let destroy = DestroyNexus::from(&nexus)
.with_disown(&request.uuid)
.with_lazy(true);
if let Err(error) = guard.destroy(registry, &destroy).await {
nexus.warn_span(|| {
tracing::warn!(
Expand Down

0 comments on commit 44b291c

Please sign in to comment.