From 855be7f5e7f8d69600d6c67fc0b37d1f4db0f413 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Tue, 5 Nov 2024 16:18:52 +0200 Subject: [PATCH] Propagate VR conditions .Message When a VR condition is not met, we set the protected PVC condition message using the error message returned from isVRConditionMet(). When using csi-addons > 0.10.0, we use now the message from the condition instead of the default message. Since the Validated condition is not reported by older version of csi-addons, and we must wait until the Validated condition status is known when VRG is deleted, isVRConditionMet() returns now also the state of the condition, which can be: - missing: condition not found - stale: observed generation does not match object generation - unknown: the special "Unknown" value - known: status is True or False When we validate the Validate condition we have these cases: - Condition is missing: continue to next condition. - Condition is met: continue to the next condition. - Condition not met and its status is False. This VR will never complete and it is safe to delete since replication will never start. If VRG is deleted, we return true since the VR reached the designed state. Otherwise we return false. In this case we updated the protected pvc condition with the message from the VR condition. - Condition is not met and is stale or unnown: we need to check again later. There is no point to check the completed condition since a VR cannot complete without validation.In this case we updated the protected pvc condition with the message generated by isVRConditionMet() for stale or unknown conditions. Example protected pvc DataReady condition with propagated message when VR validation failed: conditions: - lastTransitionTime: "2024-11-06T15:33:06Z" message: 'failed to meet prerequisite: rpc error: code = FailedPrecondition desc = system is not in a state required for the operation''s execution: failed to enable mirroring on image "replicapool/csi-vol-fe2ca7f8-713c-4c51-bf52-0d4b2c11d329": parent image "replicapool/csi-snap-e2114105-b451-469b-ad97-eb3cbe2af54e" is not enabled for mirroring' observedGeneration: 1 reason: Error status: "False" type: DataReady Signed-off-by: Nir Soffer --- internal/controller/vrg_volrep.go | 91 +++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 29 deletions(-) diff --git a/internal/controller/vrg_volrep.go b/internal/controller/vrg_volrep.go index 5b41ab12f..ee2e2695f 100644 --- a/internal/controller/vrg_volrep.go +++ b/internal/controller/vrg_volrep.go @@ -1430,20 +1430,27 @@ func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *v // // We handle 3 cases: // - Primary deleted VRG: If Validated condition exists and false, the VR will never complete and can be -// deleted safely. Otherwise Completed condition is checked. -// - Primary VRG: Completed condition is checked. +// deleted safely. +// - Primary VRG: Validated condition is checked, and if successful the Completed conditions is also checked. // - Secondary VRG: Completed, Degraded and Resyncing conditions are checked and ensured healthy. func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication, state ramendrv1alpha1.ReplicationState, ) bool { - // Check validated for primary during VRG deletion. - if state == ramendrv1alpha1.Primary && rmnutil.ResourceIsDeleted(v.instance) { - validated, ok := v.validateVRValidatedStatus(volRep) - if !validated && ok { - v.log.Info(fmt.Sprintf("VolumeReplication %s/%s failed validation and can be deleted", - volRep.GetName(), volRep.GetNamespace())) + // If primary, check the validated condition. + if state == ramendrv1alpha1.Primary { + validated, condState := v.validateVRValidatedStatus(pvc, volRep) + if !validated && condState != conditionMissing { + // If the condition is known, this VR will never complete since it failed initial validation. + if condState == conditionKnown { + v.log.Info(fmt.Sprintf("VolumeReplication %s/%s failed validation and can be deleted", + volRep.GetName(), volRep.GetNamespace())) + + // If the VRG is deleted the VR has reached the desired state. + return rmnutil.ResourceIsDeleted(v.instance) + } - return true + // The condition is stale or unknown so we need to check again later. + return false } } @@ -1467,19 +1474,25 @@ func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep return true } -// validateVRValidatedStatus validates that VolumeReplicaion resource was validated. -// Return 2 booleans +// validateVRValidatedStatus validates that VolumeReplication resource was validated. +// Returns 2 values: // - validated: true if the condition is true, otherwise false -// - ok: true if the check was succeesfull, false if the condition is missing, stale, or unknown. +// - state: condition state func (v *VRGInstance) validateVRValidatedStatus( + pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication, -) (bool, bool) { - conditionMet, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue) - if errorMsg != "" { - v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", errorMsg, volRep.GetName(), volRep.GetNamespace())) +) (bool, conditionState) { + conditionMet, condState, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue) + if !conditionMet && condState != conditionMissing { + defaultMsg := "VolumeReplication resource not validated" + v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, errorMsg, + defaultMsg) + v.updatePVCDataProtectedConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, errorMsg, + defaultMsg) + v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", defaultMsg, volRep.GetName(), volRep.GetNamespace())) } - return conditionMet, errorMsg == "" + return conditionMet, condState } // validateVRCompletedStatus validates if the VolumeReplication resource Completed condition is met and update @@ -1502,7 +1515,7 @@ func (v *VRGInstance) validateVRCompletedStatus(pvc *corev1.PersistentVolumeClai action = "demoted" } - conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionCompleted, metav1.ConditionTrue) + conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionCompleted, metav1.ConditionTrue) if !conditionMet { defaultMsg := fmt.Sprintf("VolumeReplication resource for pvc not %s to %s", action, stateString) v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg, @@ -1539,12 +1552,12 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.Persist ) bool { v.updatePVCLastSyncCounters(pvc.Namespace, pvc.Name, nil) - conditionMet, _ := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionTrue) + conditionMet, _, _ := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionTrue) if !conditionMet { return v.checkResyncCompletionAsSecondary(pvc, volRep) } - conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionTrue) + conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionTrue) if !conditionMet { defaultMsg := "VolumeReplication resource for pvc is not in Degraded condition while resyncing" v.updatePVCDataProtectedConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg, @@ -1573,7 +1586,7 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.Persist func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication, ) bool { - conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionFalse) + conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionFalse) if !conditionMet { defaultMsg := "VolumeReplication resource for pvc not syncing as Secondary" v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg, @@ -1587,7 +1600,7 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVol return false } - conditionMet, msg = isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionFalse) + conditionMet, _, msg = isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionFalse) if !conditionMet { defaultMsg := "VolumeReplication resource for pvc is not syncing and is degraded as Secondary" v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg, @@ -1611,35 +1624,55 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVol return true } -// isVRConditionMet returns true if the condition is met, and an error mesage if we could not get the -// condition value. +type conditionState string + +const ( + conditionMissing = conditionState("missing") + conditionStale = conditionState("stale") + conditionUnknown = conditionState("unknown") + conditionKnown = conditionState("known") +) + +// isVRConditionMet check if condition is met. +// Returns 3 values: +// - met: true if the condition status matches the desired status, otherwise false +// - state: one of (conditionMissing, conditionStale, conditionUnknown, conditionKnown) +// generation, and its value is not unknown. +// - errorMsg: error message describing why the condition is not met func isVRConditionMet(volRep *volrep.VolumeReplication, conditionType string, desiredStatus metav1.ConditionStatus, -) (bool, string) { +) (bool, conditionState, string) { + met := true + volRepCondition := findCondition(volRep.Status.Conditions, conditionType) if volRepCondition == nil { errorMsg := fmt.Sprintf("Failed to get the %s condition from status of VolumeReplication resource.", conditionType) - return false, errorMsg + return !met, conditionMissing, errorMsg } if volRep.GetGeneration() != volRepCondition.ObservedGeneration { errorMsg := fmt.Sprintf("Stale generation for condition %s from status of VolumeReplication resource.", conditionType) - return false, errorMsg + return !met, conditionStale, errorMsg } if volRepCondition.Status == metav1.ConditionUnknown { errorMsg := fmt.Sprintf("Unknown status for condition %s from status of VolumeReplication resource.", conditionType) - return false, errorMsg + return !met, conditionUnknown, errorMsg + } + + if volRepCondition.Status != desiredStatus { + // csi-addons > 0.10.0 returns detailed error message + return !met, conditionKnown, volRepCondition.Message } - return volRepCondition.Status == desiredStatus, "" + return met, conditionKnown, "" } // Disabling unparam linter as currently every invokation of this