fix: delayed MHC replacement of unreachable nodes

Co-authored-by: Michael Shen <[email protected]>
kubernetes-sigs · Jun 17, 2024 · dfc884a · dfc884a
1 parent 240ea95
commit dfc884a
Showing 1 changed file with 12 additions and 2 deletions.
diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go
@@ -671,8 +671,18 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
 	}
 
 	if noderefutil.IsNodeUnreachable(node) {
-		// When the node is unreachable and some pods are not evicted for as long as this timeout, we ignore them.
-		drainer.SkipWaitForDeleteTimeoutSeconds = 60 * 5 // 5 minutes
+		// Kubelet is unreachable, pods will never disappear.
+
+		// SkipWaitForDeleteTimeoutSeconds ensures the drain completes
+		// even if pod objects are not deleted.
+		drainer.SkipWaitForDeleteTimeoutSeconds = 1
+
+		// kube-apiserver sets the `deletionTimestamp` to a future date computed using the grace period.
+		// We are effectively waiting for GracePeriodSeconds + SkipWaitForDeleteTimeoutSeconds.
+		// Override the grace period of pods to reduce the time needed to skip them.
+		drainer.GracePeriodSeconds = 1
+
+		log.V(5).Info("Node is unreachable, draining will ignore gracePeriod. PDBs are still honored.")
 	}
 
 	if err := kubedrain.RunCordonOrUncordon(drainer, node, true); err != nil {