diff --git a/pkg/actions/nodegroup/drain.go b/pkg/actions/nodegroup/drain.go index 7afd04f22b..fc748a5874 100644 --- a/pkg/actions/nodegroup/drain.go +++ b/pkg/actions/nodegroup/drain.go @@ -9,19 +9,20 @@ import ( ) type DrainInput struct { - NodeGroups []eks.KubeNodeGroup - Plan bool - MaxGracePeriod time.Duration - NodeDrainWaitPeriod time.Duration - Undo bool - DisableEviction bool - Parallel int + NodeGroups []eks.KubeNodeGroup + Plan bool + MaxGracePeriod time.Duration + NodeDrainWaitPeriod time.Duration + PodEvictionWaitPeriod time.Duration + Undo bool + DisableEviction bool + Parallel int } func (m *Manager) Drain(input *DrainInput) error { if !input.Plan { for _, n := range input.NodeGroups { - nodeGroupDrainer := drain.NewNodeGroupDrainer(m.clientSet, n, m.ctl.Provider.WaitTimeout(), input.MaxGracePeriod, input.NodeDrainWaitPeriod, input.Undo, input.DisableEviction, input.Parallel) + nodeGroupDrainer := drain.NewNodeGroupDrainer(m.clientSet, n, m.ctl.Provider.WaitTimeout(), input.MaxGracePeriod, input.NodeDrainWaitPeriod, input.PodEvictionWaitPeriod, input.Undo, input.DisableEviction, input.Parallel) if err := nodeGroupDrainer.Drain(); err != nil { return err } diff --git a/pkg/ctl/delete/nodegroup.go b/pkg/ctl/delete/nodegroup.go index 19461ffe2e..67f6d80033 100644 --- a/pkg/ctl/delete/nodegroup.go +++ b/pkg/ctl/delete/nodegroup.go @@ -17,30 +17,31 @@ import ( ) func deleteNodeGroupCmd(cmd *cmdutils.Cmd) { - deleteNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, disableEviction bool, parallel int) error { - return doDeleteNodeGroup(cmd, ng, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing, maxGracePeriod, disableEviction, parallel) + deleteNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error { + return doDeleteNodeGroup(cmd, ng, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing, maxGracePeriod, podEvictionWaitPeriod, disableEviction, parallel) }) } -func deleteNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, disableEviction bool, parallel int) error) { +func deleteNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error) { cfg := api.NewClusterConfig() ng := api.NewNodeGroup() cmd.ClusterConfig = cfg var ( - updateAuthConfigMap bool - deleteNodeGroupDrain bool - onlyMissing bool - maxGracePeriod time.Duration - disableEviction bool - parallel int + updateAuthConfigMap bool + deleteNodeGroupDrain bool + onlyMissing bool + maxGracePeriod time.Duration + podEvictionWaitPeriod time.Duration + disableEviction bool + parallel int ) cmd.SetDescription("nodegroup", "Delete a nodegroup", "", "ng") cmd.CobraCommand.RunE = func(_ *cobra.Command, args []string) error { cmd.NameArg = cmdutils.GetNameArg(args) - return runFunc(cmd, ng, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing, maxGracePeriod, disableEviction, parallel) + return runFunc(cmd, ng, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing, maxGracePeriod, podEvictionWaitPeriod, disableEviction, parallel) } cmd.FlagSetGroup.InFlagSet("General", func(fs *pflag.FlagSet) { @@ -55,6 +56,8 @@ func deleteNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cm fs.BoolVar(&deleteNodeGroupDrain, "drain", true, "Drain and cordon all nodes in the nodegroup before deletion") defaultMaxGracePeriod, _ := time.ParseDuration("10m") fs.DurationVar(&maxGracePeriod, "max-grace-period", defaultMaxGracePeriod, "Maximum pods termination grace period") + defaultPodEvictionWaitPeriod, _ := time.ParseDuration("10s") + fs.DurationVar(&podEvictionWaitPeriod, "pod-eviction-wait-period", defaultPodEvictionWaitPeriod, "Duration to wait after failing to evict a pod") defaultDisableEviction := false fs.BoolVar(&disableEviction, "disable-eviction", defaultDisableEviction, "Force drain to use delete, even if eviction is supported. This will bypass checking PodDisruptionBudgets, use with caution.") fs.IntVar(¶llel, "parallel", 1, "Number of nodes to drain in parallel. Max 25") @@ -67,7 +70,7 @@ func deleteNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cm cmdutils.AddCommonFlagsForAWS(cmd.FlagSetGroup, &cmd.ProviderConfig, true) } -func doDeleteNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, disableEviction bool, parallel int) error { +func doDeleteNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error { ngFilter := filter.NewNodeGroupFilter() if err := cmdutils.NewDeleteAndDrainNodeGroupLoader(cmd, ng, ngFilter).Load(); err != nil { @@ -128,11 +131,12 @@ func doDeleteNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap cmdutils.LogIntendedAction(cmd.Plan, "drain %d nodegroup(s) in cluster %q", len(allNodeGroups), cfg.Metadata.Name) drainInput := &nodegroup.DrainInput{ - NodeGroups: allNodeGroups, - Plan: cmd.Plan, - MaxGracePeriod: maxGracePeriod, - DisableEviction: disableEviction, - Parallel: parallel, + NodeGroups: allNodeGroups, + Plan: cmd.Plan, + MaxGracePeriod: maxGracePeriod, + PodEvictionWaitPeriod: podEvictionWaitPeriod, + DisableEviction: disableEviction, + Parallel: parallel, } err := nodeGroupManager.Drain(drainInput) if err != nil { diff --git a/pkg/ctl/delete/nodegroup_test.go b/pkg/ctl/delete/nodegroup_test.go index b46ec18019..fb42d8b3b5 100644 --- a/pkg/ctl/delete/nodegroup_test.go +++ b/pkg/ctl/delete/nodegroup_test.go @@ -22,7 +22,7 @@ var _ = Describe("delete", func() { cmd := newMockEmptyCmd(args...) count := 0 cmdutils.AddResourceCmd(cmdutils.NewGrouping(), cmd.parentCmd, func(cmd *cmdutils.Cmd) { - deleteNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *v1alpha5.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, disableEviction bool, parallel int) error { + deleteNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *v1alpha5.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error { Expect(cmd.ClusterConfig.Metadata.Name).To(Equal("clusterName")) Expect(ng.Name).To(Equal("ng")) count++ diff --git a/pkg/ctl/drain/nodegroup.go b/pkg/ctl/drain/nodegroup.go index 87e719be82..d3ad4e5f70 100644 --- a/pkg/ctl/drain/nodegroup.go +++ b/pkg/ctl/drain/nodegroup.go @@ -15,30 +15,31 @@ import ( ) func drainNodeGroupCmd(cmd *cmdutils.Cmd) { - drainNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, disableEviction bool, parallel int) error { - return doDrainNodeGroup(cmd, ng, undo, onlyMissing, maxGracePeriod, nodeDrainWaitPeriod, disableEviction, parallel) + drainNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error { + return doDrainNodeGroup(cmd, ng, undo, onlyMissing, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod, disableEviction, parallel) }) } -func drainNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, disableEviction bool, parallel int) error) { +func drainNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error) { cfg := api.NewClusterConfig() ng := api.NewNodeGroup() cmd.ClusterConfig = cfg var ( - undo bool - onlyMissing bool - disableEviction bool - parallel int - maxGracePeriod time.Duration - nodeDrainWaitPeriod time.Duration + undo bool + onlyMissing bool + disableEviction bool + parallel int + maxGracePeriod time.Duration + nodeDrainWaitPeriod time.Duration + podEvictionWaitPeriod time.Duration ) cmd.SetDescription("nodegroup", "Cordon and drain a nodegroup", "", "ng") cmd.CobraCommand.RunE = func(_ *cobra.Command, args []string) error { cmd.NameArg = cmdutils.GetNameArg(args) - return runFunc(cmd, ng, undo, onlyMissing, maxGracePeriod, nodeDrainWaitPeriod, disableEviction, parallel) + return runFunc(cmd, ng, undo, onlyMissing, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod, disableEviction, parallel) } cmd.FlagSetGroup.InFlagSet("General", func(fs *pflag.FlagSet) { @@ -52,6 +53,8 @@ func drainNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd fs.BoolVar(&undo, "undo", false, "Uncordon the nodegroup") defaultMaxGracePeriod, _ := time.ParseDuration("10m") fs.DurationVar(&maxGracePeriod, "max-grace-period", defaultMaxGracePeriod, "Maximum pods termination grace period") + defaultPodEvictionWaitPeriod, _ := time.ParseDuration("10s") + fs.DurationVar(&podEvictionWaitPeriod, "pod-eviction-wait-period", defaultPodEvictionWaitPeriod, "Duration to wait after failing to evict a pod") defaultDisableEviction := false fs.BoolVar(&disableEviction, "disable-eviction", defaultDisableEviction, "Force drain to use delete, even if eviction is supported. This will bypass checking PodDisruptionBudgets, use with caution.") cmdutils.AddTimeoutFlag(fs, &cmd.ProviderConfig.WaitTimeout) @@ -62,7 +65,7 @@ func drainNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd cmdutils.AddCommonFlagsForAWS(cmd.FlagSetGroup, &cmd.ProviderConfig, true) } -func doDrainNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, disableEviction bool, parallel int) error { +func doDrainNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error { ngFilter := filter.NewNodeGroupFilter() if err := cmdutils.NewDeleteAndDrainNodeGroupLoader(cmd, ng, ngFilter).Load(); err != nil { @@ -125,13 +128,14 @@ func doDrainNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bo allNodeGroups := cmdutils.ToKubeNodeGroups(cfg) drainInput := &nodegroup.DrainInput{ - NodeGroups: allNodeGroups, - Plan: cmd.Plan, - MaxGracePeriod: maxGracePeriod, - NodeDrainWaitPeriod: nodeDrainWaitPeriod, - Undo: undo, - DisableEviction: disableEviction, - Parallel: parallel, + NodeGroups: allNodeGroups, + Plan: cmd.Plan, + MaxGracePeriod: maxGracePeriod, + NodeDrainWaitPeriod: nodeDrainWaitPeriod, + PodEvictionWaitPeriod: podEvictionWaitPeriod, + Undo: undo, + DisableEviction: disableEviction, + Parallel: parallel, } return nodegroup.New(cfg, ctl, clientSet).Drain(drainInput) } diff --git a/pkg/ctl/drain/nodegroup_test.go b/pkg/ctl/drain/nodegroup_test.go index 103c160750..463c36e443 100644 --- a/pkg/ctl/drain/nodegroup_test.go +++ b/pkg/ctl/drain/nodegroup_test.go @@ -24,7 +24,7 @@ var _ = Describe("drain node group", func() { cmd := newMockEmptyCmd(args...) count := 0 cmdutils.AddResourceCmd(cmdutils.NewGrouping(), cmd.parentCmd, func(cmd *cmdutils.Cmd) { - drainNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *v1alpha5.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, disableEviction bool, parallel int) error { + drainNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *v1alpha5.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error { Expect(cmd.ClusterConfig.Metadata.Name).To(Equal("clusterName")) Expect(ng.Name).To(Equal("ng")) count++ diff --git a/pkg/drain/evictor/pod.go b/pkg/drain/evictor/pod.go index 031dad10c3..fcd517eef2 100644 --- a/pkg/drain/evictor/pod.go +++ b/pkg/drain/evictor/pod.go @@ -29,7 +29,6 @@ import ( const ( daemonSetFatal = "DaemonSet-managed Pods (use --ignore-daemonsets to ignore)" - daemonSetWarning = "ignoring DaemonSet-managed Pods" localStorageFatal = "Pods with local storage (use --delete-local-data to override)" localStorageWarning = "deleting Pods with local storage" unmanagedFatal = "Pods not managed by ReplicationController, ReplicaSet, Job, DaemonSet or StatefulSet (use --force to override)" @@ -204,7 +203,7 @@ func (d *Evictor) daemonSetFilter(pod corev1.Pod) PodDeleteStatus { if controllerRef.Name == ignoreDaemonSet.Name { switch ignoreDaemonSet.Namespace { case pod.Namespace, metav1.NamespaceAll: - return makePodDeleteStatusWithWarning(false, daemonSetWarning) + return makePodDeleteStatusSkip() } } } @@ -213,7 +212,7 @@ func (d *Evictor) daemonSetFilter(pod corev1.Pod) PodDeleteStatus { return makePodDeleteStatusWithError(daemonSetFatal) } - return makePodDeleteStatusWithWarning(false, daemonSetWarning) + return makePodDeleteStatusSkip() } func (d *Evictor) mirrorPodFilter(pod corev1.Pod) PodDeleteStatus { diff --git a/pkg/drain/nodegroup.go b/pkg/drain/nodegroup.go index 35f1b5b380..49eb28a54f 100644 --- a/pkg/drain/nodegroup.go +++ b/pkg/drain/nodegroup.go @@ -3,8 +3,10 @@ package drain import ( "context" "fmt" + "strings" "time" + "golang.org/x/sync/errgroup" "golang.org/x/sync/semaphore" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -27,6 +29,13 @@ import ( // retryDelay is how long is slept before retry after an error occurs during drainage const retryDelay = 5 * time.Second +var recoverablePodEvictionErrors = [...]string{ + "Cannot evict pod as it would violate the pod's disruption budget", + "TooManyRequests", + "NotFound", + "not found", +} + //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 -generate //counterfeiter:generate -o fakes/fake_evictor.go . Evictor type Evictor interface { @@ -36,16 +45,17 @@ type Evictor interface { } type NodeGroupDrainer struct { - clientSet kubernetes.Interface - evictor Evictor - ng eks.KubeNodeGroup - waitTimeout time.Duration - nodeDrainWaitPeriod time.Duration - undo bool - parallel int + clientSet kubernetes.Interface + evictor Evictor + ng eks.KubeNodeGroup + waitTimeout time.Duration + nodeDrainWaitPeriod time.Duration + podEvictionWaitPeriod time.Duration + undo bool + parallel int } -func NewNodeGroupDrainer(clientSet kubernetes.Interface, ng eks.KubeNodeGroup, waitTimeout, maxGracePeriod, nodeDrainWaitPeriod time.Duration, undo, disableEviction bool, parallel int) NodeGroupDrainer { +func NewNodeGroupDrainer(clientSet kubernetes.Interface, ng eks.KubeNodeGroup, waitTimeout, maxGracePeriod, nodeDrainWaitPeriod time.Duration, podEvictionWaitPeriod time.Duration, undo, disableEviction bool, parallel int) NodeGroupDrainer { ignoreDaemonSets := []metav1.ObjectMeta{ { Namespace: "kube-system", @@ -73,13 +83,14 @@ func NewNodeGroupDrainer(clientSet kubernetes.Interface, ng eks.KubeNodeGroup, w } return NodeGroupDrainer{ - evictor: evictor.New(clientSet, maxGracePeriod, ignoreDaemonSets, disableEviction), - clientSet: clientSet, - ng: ng, - waitTimeout: waitTimeout, - nodeDrainWaitPeriod: nodeDrainWaitPeriod, - undo: undo, - parallel: parallel, + evictor: evictor.New(clientSet, maxGracePeriod, ignoreDaemonSets, disableEviction), + clientSet: clientSet, + ng: ng, + waitTimeout: waitTimeout, + nodeDrainWaitPeriod: nodeDrainWaitPeriod, + podEvictionWaitPeriod: podEvictionWaitPeriod, + undo: undo, + parallel: parallel, } } @@ -112,6 +123,7 @@ func (n *NodeGroupDrainer) Drain() error { parallelLimit := int64(n.parallel) sem := semaphore.NewWeighted(parallelLimit) logger.Info("starting parallel draining, max in-flight of %d", parallelLimit) + var evictErr error // loop until all nodes are drained to handle accidental scale-up // or any other changes in the ASG for { @@ -121,6 +133,9 @@ func (n *NodeGroupDrainer) Drain() error { waitForAllRoutinesToFinish(context.TODO(), sem, parallelLimit) return fmt.Errorf("timed out (after %s) waiting for nodegroup %q to be drained", n.waitTimeout, n.ng.NameString()) default: + if evictErr != nil { + return evictErr + } nodes, err := n.clientSet.CoreV1().Nodes().List(context.TODO(), listOptions) if err != nil { return err @@ -143,32 +158,37 @@ func (n *NodeGroupDrainer) Drain() error { logger.Debug("already drained: %v", mapToList(drainedNodes.Items())) logger.Debug("will drain: %v", newPendingNodes.List()) - for i, node := range newPendingNodes.List() { - if err := sem.Acquire(ctx, 1); err != nil { - logger.Critical("failed to acquire semaphore: %w", err) - } - go func(i int, node string) { + errorGroup, _ := errgroup.WithContext(ctx) + for _, node := range newPendingNodes.List() { + node := node + errorGroup.Go(func() error { + if err := sem.Acquire(ctx, 1); err != nil { + return errors.Wrapf(err, "failed to acquire semaphore") + } defer sem.Release(1) + + drainedNodes.Set(node, nil) logger.Debug("starting drain of node %s", node) - pending, err := n.evictPods(node) + err := n.evictPods(ctx, node) if err != nil { logger.Warning("pod eviction error (%q) on node %s", err, node) time.Sleep(retryDelay) - return + return err } - logger.Debug("%d pods to be evicted from %s", pending, node) - if pending == 0 { - drainedNodes.Set(node, nil) - } + drainedNodes.Set(node, nil) if n.nodeDrainWaitPeriod > 0 { logger.Debug("waiting for %.0f seconds before draining next node", n.nodeDrainWaitPeriod.Seconds()) time.Sleep(n.nodeDrainWaitPeriod) } - }(i, node) + return nil + }) } + // We need to loop even if this is an error to check whether the error was a + // context timeout or something else. This lets us log timout errors consistently + evictErr = errorGroup.Wait() } } } @@ -204,29 +224,53 @@ func (n *NodeGroupDrainer) toggleCordon(cordon bool, nodes *corev1.NodeList) { logger.Debug("no need to %s node %q", cordonStatus(cordon), node.Name) } } - } -func (n *NodeGroupDrainer) evictPods(node string) (int, error) { - list, errs := n.evictor.GetPodsForEviction(node) - if len(errs) > 0 { - return 0, fmt.Errorf("errs: %v", errs) // TODO: improve formatting - } - if list == nil { - return 0, nil - } - if w := list.Warnings(); w != "" { - logger.Warning(w) - } - pods := list.Pods() - pending := len(pods) - for _, pod := range pods { - // TODO: handle API rate limiter error - if err := n.evictor.EvictOrDeletePod(pod); err != nil { - return pending, errors.Wrapf(err, "error evicting pod: %s/%s", pod.Namespace, pod.Name) +func (n *NodeGroupDrainer) evictPods(ctx context.Context, node string) error { + // Loop until context times out. We want to continually try to remove pods + // from the node as their eviction status changes. + previousReportTime := time.Now() + for { + select { + case <-ctx.Done(): + return fmt.Errorf("timed out (after %s) waiting for node %q to be drained", n.waitTimeout, node) + default: + list, errs := n.evictor.GetPodsForEviction(node) + if len(errs) > 0 { + return fmt.Errorf("errs: %v", errs) // TODO: improve formatting + } + if list == nil { + return nil + } + pods := list.Pods() + if len(pods) == 0 { + return nil + } + if w := list.Warnings(); w != "" { + logger.Warning(w) + } + // This log message is important but can be noisy. It's useful to only + // update on a node every minute. + if time.Now().Sub(previousReportTime) > time.Minute*1 && len(pods) > 0 { + logger.Warning("%d pods are unevictable from node %s", len(pods), node) + previousReportTime = time.Now() + } + logger.Debug("%d pods to be evicted from %s", pods, node) + failedEvictions := false + for _, pod := range pods { + if err := n.evictor.EvictOrDeletePod(pod); err != nil { + if !isEvictionErrorRecoverable(err) { + return errors.Wrapf(err, "unrecoverable error evicting pod: %s/%s", pod.Namespace, pod.Name) + } + logger.Debug("pod eviction failed recoverably: %q", err) + failedEvictions = true + } + } + if failedEvictions { + time.Sleep(n.podEvictionWaitPeriod) + } } } - return pending, nil } func cordonStatus(desired bool) string { @@ -235,3 +279,12 @@ func cordonStatus(desired bool) string { } return "uncordon" } + +func isEvictionErrorRecoverable(err error) bool { + for _, recoverableError := range recoverablePodEvictionErrors { + if strings.Contains(err.Error(), recoverableError) { + return true + } + } + return false +} diff --git a/pkg/drain/nodegroup_test.go b/pkg/drain/nodegroup_test.go index 32520ede9e..42863a6154 100644 --- a/pkg/drain/nodegroup_test.go +++ b/pkg/drain/nodegroup_test.go @@ -2,6 +2,7 @@ package drain_test import ( "context" + "errors" "fmt" "time" @@ -16,6 +17,7 @@ import ( . "github.com/onsi/gomega" . "github.com/onsi/ginkgo" + "github.com/weaveworks/eksctl/pkg/drain" "github.com/weaveworks/eksctl/pkg/eks/mocks" "k8s.io/client-go/kubernetes/fake" @@ -73,7 +75,7 @@ var _ = Describe("Drain", func() { }) It("does not error", func() { - nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second*10, time.Second, false, false, 1) + nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second*10, time.Second, time.Second*0, false, false, 1) nodeGroupDrainer.SetDrainer(fakeEvictor) err := nodeGroupDrainer.Drain() @@ -117,7 +119,7 @@ var _ = Describe("Drain", func() { }) It("times out and errors", func() { - nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*2, time.Second*0, time.Second, false, false, 1) + nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*2, time.Second*0, time.Second, time.Second*0, false, false, 1) nodeGroupDrainer.SetDrainer(fakeEvictor) err := nodeGroupDrainer.Drain() @@ -131,7 +133,7 @@ var _ = Describe("Drain", func() { }) It("errors", func() { - nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second, time.Second, time.Second, false, false, 1) + nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second, time.Second, time.Second, time.Second*0, false, false, 1) nodeGroupDrainer.SetDrainer(fakeEvictor) err := nodeGroupDrainer.Drain() @@ -175,7 +177,7 @@ var _ = Describe("Drain", func() { }) It("does not error", func() { - nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second, time.Second, false, true, 1) + nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second, time.Second, time.Second*0, false, true, 1) nodeGroupDrainer.SetDrainer(fakeEvictor) err := nodeGroupDrainer.Drain() @@ -205,7 +207,7 @@ var _ = Describe("Drain", func() { }) It("uncordons all the nodes", func() { - nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second, time.Second, true, false, 1) + nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second, time.Second, time.Second*0, true, false, 1) nodeGroupDrainer.SetDrainer(fakeEvictor) err := nodeGroupDrainer.Drain() @@ -219,4 +221,169 @@ var _ = Describe("Drain", func() { Expect(fakeEvictor.EvictOrDeletePodCallCount()).To(BeZero()) }) }) + + When("an eviction fails recoverably", func() { + var pod corev1.Pod + + BeforeEach(func() { + pod = corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-1", + }, + } + + for i := 0; i < 2; i++ { + fakeEvictor.GetPodsForEvictionReturnsOnCall(i, &evictor.PodDeleteList{ + Items: []evictor.PodDelete{ + { + Pod: pod, + Status: evictor.PodDeleteStatus{ + Delete: true, + }, + }, + }, + }, nil) + } + fakeEvictor.GetPodsForEvictionReturnsOnCall(2, nil, nil) + + fakeEvictor.EvictOrDeletePodReturnsOnCall(0, errors.New("Cannot evict pod as it would violate the pod's disruption budget")) + fakeEvictor.EvictOrDeletePodReturnsOnCall(1, nil) + + _, err := fakeClientSet.CoreV1().Nodes().Create(context.TODO(), &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Spec: corev1.NodeSpec{ + Unschedulable: false, + }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + }) + + It("does not error", func() { + nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second, time.Second, time.Second*0, false, false, 1) + nodeGroupDrainer.SetDrainer(fakeEvictor) + + err := nodeGroupDrainer.Drain() + Expect(err).NotTo(HaveOccurred()) + + Expect(fakeEvictor.GetPodsForEvictionCallCount()).To(Equal(3)) + Expect(fakeEvictor.EvictOrDeletePodCallCount()).To(Equal(2)) + Expect(fakeEvictor.EvictOrDeletePodArgsForCall(0)).To(Equal(pod)) + Expect(fakeEvictor.EvictOrDeletePodArgsForCall(1)).To(Equal(pod)) + }) + }) + + When("an eviction fails irrecoverably", func() { + var pod corev1.Pod + var evictionError error + + BeforeEach(func() { + pod = corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-1", + Namespace: "ns-1", + }, + } + + fakeEvictor.GetPodsForEvictionReturns(&evictor.PodDeleteList{ + Items: []evictor.PodDelete{ + { + Pod: pod, + Status: evictor.PodDeleteStatus{ + Delete: true, + }, + }, + }, + }, nil) + + evictionError = errors.New("error1") + fakeEvictor.EvictOrDeletePodReturns(evictionError) + + _, err := fakeClientSet.CoreV1().Nodes().Create(context.TODO(), &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Spec: corev1.NodeSpec{ + Unschedulable: false, + }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + }) + + It("returns an error", func() { + nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second, time.Second, time.Second*0, false, false, 1) + nodeGroupDrainer.SetDrainer(fakeEvictor) + + err := nodeGroupDrainer.Drain() + Expect(err.Error()).To(ContainSubstring("unrecoverable error evicting pod: ns-1/pod-1")) + + Expect(fakeEvictor.GetPodsForEvictionCallCount()).To(Equal(1)) + Expect(fakeEvictor.EvictOrDeletePodCallCount()).To(Equal(1)) + Expect(fakeEvictor.EvictOrDeletePodArgsForCall(0)).To(Equal(pod)) + }) + }) + + When("eviction fails recoverably with multiple pods", func() { + var pods []corev1.Pod + var evictionError error + + BeforeEach(func() { + pods = []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-1", + Namespace: "ns-1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-2", + Namespace: "ns-2", + }, + }, + } + + fakeEvictor.GetPodsForEvictionReturns(&evictor.PodDeleteList{ + Items: []evictor.PodDelete{ + { + Pod: pods[0], + Status: evictor.PodDeleteStatus{ + Delete: true, + }, + }, + { + Pod: pods[1], + Status: evictor.PodDeleteStatus{ + Delete: true, + }, + }, + }, + }, nil) + + evictionError = errors.New("Cannot evict pod as it would violate the pod's disruption budget") + fakeEvictor.EvictOrDeletePodReturns(evictionError) + + _, err := fakeClientSet.CoreV1().Nodes().Create(context.TODO(), &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Spec: corev1.NodeSpec{ + Unschedulable: false, + }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + }) + + It("it attempts to drain all pods", func() { + nodeGroupDrainer := drain.NewNodeGroupDrainer(fakeClientSet, &mockNG, time.Second*10, time.Second, time.Second, time.Second*0, false, false, 1) + nodeGroupDrainer.SetDrainer(fakeEvictor) + + _ = nodeGroupDrainer.Drain() + + Expect(fakeEvictor.EvictOrDeletePodCallCount()).To(BeNumerically(">=", 2)) + Expect(fakeEvictor.EvictOrDeletePodArgsForCall(0)).To(Equal(pods[0])) + Expect(fakeEvictor.EvictOrDeletePodArgsForCall(1)).To(Equal(pods[1])) + }) + }) })