Skip to content

Commit

Permalink
fix(nexus/replica): check with replica owner before destroying it
Browse files Browse the repository at this point in the history
A user hit a very weird situation where there were 2 created nexuses containing the same replica.
How can this happen? A potential situation is fixed where we now collect volume state AFTER we
get the volume guard, though it's a very tight race so I suspect something else might still
be at play here..

Irregardless of how this can happen we now plug the hole by ensuring we always check wit the
volume replica removal logic before attempting to disown and destroy a replica.

Signed-off-by: Tiago Castro <[email protected]>
  • Loading branch information
tiagolobocastro committed Jul 14, 2023
1 parent ea6eb96 commit e4edf5d
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,24 @@ async fn hot_spare_reconcile(
volume_spec: &Arc<Mutex<VolumeSpec>>,
context: &PollContext,
) -> PollResult {
let uuid = volume_spec.lock().uuid.clone();
let volume_state = context.registry().get_volume_state(&uuid).await?;
let _guard = match volume_spec.operation_guard(OperationMode::ReconcileStart) {
Ok(guard) => guard,
Err(_) => return PollResult::Ok(PollerState::Busy),
};
let mode = OperationMode::ReconcileStep;

if !volume_spec.lock().policy.self_heal {
let volume_spec_cln = volume_spec.lock().clone();
if !volume_spec_cln.policy.self_heal {
return PollResult::Ok(PollerState::Idle);
}
if !volume_spec.lock().status.created() {
if !volume_spec_cln.status.created() {
return PollResult::Ok(PollerState::Idle);
}

let volume_state = context
.registry()
.get_volume_spec_state(volume_spec_cln)
.await?;
let mode = OperationMode::ReconcileStep;

match volume_state.status {
VolumeStatus::Online => volume_replica_count_reconciler(volume_spec, context, mode).await,
VolumeStatus::Unknown | VolumeStatus::Degraded => {
Expand Down
46 changes: 16 additions & 30 deletions control-plane/agents/core/src/nexus/specs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::core::{
};
use common::errors::{NexusNotFound, SvcError};
use common_lib::{
mbus_api::{ErrorChain, ResourceKind},
mbus_api::ResourceKind,
types::v0::{
message_bus::{
AddNexusChild, AddNexusReplica, Child, ChildUri, CreateNexus, DestroyNexus, Nexus,
Expand All @@ -16,7 +16,7 @@ use common_lib::{
nexus::{NexusOperation, NexusSpec},
nexus_child::NexusChild,
replica::ReplicaSpec,
OperationMode, SpecStatus, SpecTransaction, TraceSpan,
OperationMode, SpecStatus, SpecTransaction,
},
},
};
Expand Down Expand Up @@ -433,37 +433,23 @@ impl ResourceSpecsLocked {
.ok_or(SvcError::ReplicaNotFound {
replica_id: replica.uuid().clone(),
})?;
let pool_id = replica_spec.lock().pool.clone();
match Self::get_pool_node(registry, pool_id).await {
Some(node) => {
if let Err(error) = self
.disown_and_destroy_replica(registry, &node, replica.uuid())
.await
{
nexus_spec.lock().clone().error_span(|| {
tracing::error!(
replica.uuid = %replica.uuid(),
error = %error.full_string(),
"Failed to disown and destroy the replica"
)
});
}
}
None => {
// The replica node was not found (perhaps because it is offline).
// The replica can't be destroyed because the node isn't there.
// Instead, disown the replica from the volume and let the garbage
// collector destroy it later.
nexus_spec.lock().clone().warn_span(|| {
tracing::warn!(
replica.uuid = %replica.uuid(),
"Failed to find the node where the replica is hosted"
if !replica_spec.lock().owners.owned_by_a_nexus() {
let owner_volume = {
let replica_spec = replica_spec.lock();
replica_spec.owners.volume().cloned()
};
if let Some(owner) = owner_volume {
if let Some(volume) = self.get_locked_volume(&owner) {
self.remove_unused_volume_replica(
registry,
&volume,
replica.uuid(),
mode,
)
});
let _ = self.disown_volume_replica(registry, &replica_spec).await;
.await?;
}
}
}

Ok(())
}
result => result,
Expand Down
11 changes: 11 additions & 0 deletions control-plane/agents/core/src/volume/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ impl Registry {
.await
}

/// Get the volume state for the specified volume spec.
pub(crate) async fn get_volume_spec_state(
&self,
volume_spec: VolumeSpec,
) -> Result<VolumeState, SvcError> {
let replica_specs = self.specs().get_cloned_volume_replicas(&volume_spec.uuid);

self.get_volume_state_with_replicas(&volume_spec, &replica_specs)
.await
}

/// Get the volume state for the specified volume
#[tracing::instrument(level = "info", skip(self, volume_spec, replicas))]
pub(crate) async fn get_volume_state_with_replicas(
Expand Down
33 changes: 13 additions & 20 deletions control-plane/agents/core/src/volume/specs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,8 @@ impl ResourceSpecsLocked {
{
Ok(_) => Ok(()),
Err(error) if !request.force() => Err(error),
Err(error @ SvcError::Conflict {}) => Err(error),
Err(error @ SvcError::StoreSave { .. }) => Err(error),
Err(error) => {
let node_online = match registry.get_node_wrapper(&nexus_clone.node).await {
Ok(node) => {
Expand Down Expand Up @@ -1053,6 +1055,17 @@ impl ResourceSpecsLocked {
.await
{
Ok(_) => Ok(()),
Err(SvcError::GrpcRequestError {
source,
request,
resource,
}) if source.code() == tonic::Code::DeadlineExceeded => {
Err(SvcError::GrpcRequestError {
source,
request,
resource,
})
}
Err(error) => {
if let Some(replica) = self.get_replica(&replica.uuid) {
let mut replica = replica.lock();
Expand Down Expand Up @@ -1333,26 +1346,6 @@ impl ResourceSpecsLocked {
.await
}

/// Disown and destroy the replica from its volume
pub(crate) async fn disown_and_destroy_replica(
&self,
registry: &Registry,
node: &NodeId,
replica_uuid: &ReplicaId,
) -> Result<(), SvcError> {
if let Some(replica) = self.get_replica(replica_uuid) {
// disown it from the volume first, so at the very least it can be garbage collected
// at a later point if the node is not accessible
self.disown_volume_replica(registry, &replica).await?;
self.destroy_volume_replica(registry, Some(node), &replica)
.await
} else {
Err(SvcError::ReplicaNotFound {
replica_id: replica_uuid.to_owned(),
})
}
}

/// Remove volume by its `id`
pub(super) fn remove_volume(&self, id: &VolumeId) {
let mut specs = self.write();
Expand Down

0 comments on commit e4edf5d

Please sign in to comment.