arangodb · ajanikow · Oct 15, 2023 · Oct 15, 2023 · Oct 15, 2023 · Oct 15, 2023
@@ -13,6 +13,7 @@
 - (Feature) EnforcedResignLeadership action
 - (Maintenance) Make scale_down_candidate annotation obsolete
 - (Bugfix) Fix ResignJob ID propagation
+- (Bugfix) Allow shards with RF1 in EnforcedResignLeadership action
 
 ## [1.2.33](https://github.com/arangodb/kube-arangodb/tree/1.2.33) (2023-09-27)
 - (Maintenance) Bump golang.org/x/net to v0.13.0

@@ -238,6 +238,30 @@ func (s State) PlanLeaderServers() Servers {
 	return r
 }
 
+// PlanLeaderServersWithFailOver returns all servers which are part of the plan as a leader and can fail over
+func (s State) PlanLeaderServersWithFailOver() Servers {
+	q := map[Server]bool{}
+
+	for _, db := range s.Plan.Collections {
+		for _, col := range db {
+			for _, shards := range col.Shards {
+				if len(shards) <= 1 {
+					continue
+				}
+				q[shards[0]] = true
+			}
+		}
+	}
+
+	r := make([]Server, 0, len(q))
+
+	for k := range q {
+		r = append(r, k)
+	}
+
+	return r
+}
+
 type CollectionShardDetails []CollectionShardDetail
 
 type CollectionShardDetail struct {

@@ -103,30 +103,37 @@ func (a *actionEnforceResignLeadership) CheckProgress(ctx context.Context) (bool
 	}
 
 	// Lets start resign job if required
-	if j, ok := a.actionCtx.Get(a.action, resignLeadershipJobID); ok && j != "" {
+	if j, ok := a.actionCtx.Get(a.action, resignLeadershipJobID); ok && j != "" && j != "N/A" {
 		_, jobStatus := agencyState.Target.GetJob(state.JobID(j))
 		switch jobStatus {
 		case state.JobPhaseFailed:
 			a.log.Error("Resign server job failed")
 			// Remove key
-			a.actionCtx.Add(resignLeadershipJobID, "", true)
+			a.actionCtx.Add(resignLeadershipJobID, "N/A", true)
 			return false, false, nil
 		case state.JobPhaseFinished:
 			a.log.Info("Job finished")
 			// Remove key
-			a.actionCtx.Add(resignLeadershipJobID, "", true)
+			a.actionCtx.Add(resignLeadershipJobID, "N/A", true)
 		case state.JobPhaseUnknown:
 			a.log.Str("status", string(jobStatus)).Error("Resign server job unknown status")
 			return false, false, nil
 		default:
 			return false, false, nil
 		}
 
+		a.actionCtx.Add(resignLeadershipJobID, "N/A", true)
+
 		// Job is Finished, check if we are not a leader anymore
 		if agencyState.PlanLeaderServers().Contains(state.Server(m.ID)) {
 			// We are still a leader!
-			a.log.Warn("DBServers is still a leader for shards")
-			return false, false, nil
+			if agencyState.PlanLeaderServersWithFailOver().Contains(state.Server(m.ID)) {
+				// We need to retry
+				a.log.Warn("DBServer is still a leader for shards")
+				return false, false, nil
+			}
+			// Nothing to do as RF is set to 1
+			a.log.Warn("DBServer is still a leader for shards, but ReplicationFactor is set to 1")
 		}
 		return true, false, nil
 	}