Skip to content

Commit

Permalink
Handle pod eviction errors correctly
Browse files Browse the repository at this point in the history
Currently any eviction error causes the draining of a node to stop and a new
node to start draining.  Eviction errors are common, expected occurences
especially when PDBs are used in the cluster.
By having any error abort the draining of a node we slow down the entire node
draining process as many of the pods further in the list could happily be
drained.

This change separates recoverable and irrecoverable eviction errors and retries
only the recoverable.  Unrecoverable errors fail the entire command.

An important aspect of this is that the `evictPods` function becomes blocking
until a node is drained or the process times out.  This is required as the
current implementation begins draining another node on the first eviction
error.  We would rather keep trying and eventually time out than make a bad
situation worse by draining a new node.
  • Loading branch information
hintofbasil committed Apr 12, 2022
1 parent 506e8da commit 20ac5db
Show file tree
Hide file tree
Showing 8 changed files with 326 additions and 98 deletions.
17 changes: 9 additions & 8 deletions pkg/actions/nodegroup/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,20 @@ import (
)

type DrainInput struct {
NodeGroups []eks.KubeNodeGroup
Plan bool
MaxGracePeriod time.Duration
NodeDrainWaitPeriod time.Duration
Undo bool
DisableEviction bool
Parallel int
NodeGroups []eks.KubeNodeGroup
Plan bool
MaxGracePeriod time.Duration
NodeDrainWaitPeriod time.Duration
PodEvictionWaitPeriod time.Duration
Undo bool
DisableEviction bool
Parallel int
}

func (m *Manager) Drain(input *DrainInput) error {
if !input.Plan {
for _, n := range input.NodeGroups {
nodeGroupDrainer := drain.NewNodeGroupDrainer(m.clientSet, n, m.ctl.Provider.WaitTimeout(), input.MaxGracePeriod, input.NodeDrainWaitPeriod, input.Undo, input.DisableEviction, input.Parallel)
nodeGroupDrainer := drain.NewNodeGroupDrainer(m.clientSet, n, m.ctl.Provider.WaitTimeout(), input.MaxGracePeriod, input.NodeDrainWaitPeriod, input.PodEvictionWaitPeriod, input.Undo, input.DisableEviction, input.Parallel)
if err := nodeGroupDrainer.Drain(); err != nil {
return err
}
Expand Down
36 changes: 20 additions & 16 deletions pkg/ctl/delete/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,31 @@ import (
)

func deleteNodeGroupCmd(cmd *cmdutils.Cmd) {
deleteNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, disableEviction bool, parallel int) error {
return doDeleteNodeGroup(cmd, ng, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing, maxGracePeriod, disableEviction, parallel)
deleteNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error {
return doDeleteNodeGroup(cmd, ng, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing, maxGracePeriod, podEvictionWaitPeriod, disableEviction, parallel)
})
}

func deleteNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, disableEviction bool, parallel int) error) {
func deleteNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error) {
cfg := api.NewClusterConfig()
ng := api.NewNodeGroup()
cmd.ClusterConfig = cfg

var (
updateAuthConfigMap bool
deleteNodeGroupDrain bool
onlyMissing bool
maxGracePeriod time.Duration
disableEviction bool
parallel int
updateAuthConfigMap bool
deleteNodeGroupDrain bool
onlyMissing bool
maxGracePeriod time.Duration
podEvictionWaitPeriod time.Duration
disableEviction bool
parallel int
)

cmd.SetDescription("nodegroup", "Delete a nodegroup", "", "ng")

cmd.CobraCommand.RunE = func(_ *cobra.Command, args []string) error {
cmd.NameArg = cmdutils.GetNameArg(args)
return runFunc(cmd, ng, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing, maxGracePeriod, disableEviction, parallel)
return runFunc(cmd, ng, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing, maxGracePeriod, podEvictionWaitPeriod, disableEviction, parallel)
}

cmd.FlagSetGroup.InFlagSet("General", func(fs *pflag.FlagSet) {
Expand All @@ -55,6 +56,8 @@ func deleteNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cm
fs.BoolVar(&deleteNodeGroupDrain, "drain", true, "Drain and cordon all nodes in the nodegroup before deletion")
defaultMaxGracePeriod, _ := time.ParseDuration("10m")
fs.DurationVar(&maxGracePeriod, "max-grace-period", defaultMaxGracePeriod, "Maximum pods termination grace period")
defaultPodEvictionWaitPeriod, _ := time.ParseDuration("10s")
fs.DurationVar(&podEvictionWaitPeriod, "pod-eviction-wait-period", defaultPodEvictionWaitPeriod, "Duration to wait after failing to evict a pod")
defaultDisableEviction := false
fs.BoolVar(&disableEviction, "disable-eviction", defaultDisableEviction, "Force drain to use delete, even if eviction is supported. This will bypass checking PodDisruptionBudgets, use with caution.")
fs.IntVar(&parallel, "parallel", 1, "Number of nodes to drain in parallel. Max 25")
Expand All @@ -67,7 +70,7 @@ func deleteNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cm
cmdutils.AddCommonFlagsForAWS(cmd.FlagSetGroup, &cmd.ProviderConfig, true)
}

func doDeleteNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, disableEviction bool, parallel int) error {
func doDeleteNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error {
ngFilter := filter.NewNodeGroupFilter()

if err := cmdutils.NewDeleteAndDrainNodeGroupLoader(cmd, ng, ngFilter).Load(); err != nil {
Expand Down Expand Up @@ -128,11 +131,12 @@ func doDeleteNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, updateAuthConfigMap
cmdutils.LogIntendedAction(cmd.Plan, "drain %d nodegroup(s) in cluster %q", len(allNodeGroups), cfg.Metadata.Name)

drainInput := &nodegroup.DrainInput{
NodeGroups: allNodeGroups,
Plan: cmd.Plan,
MaxGracePeriod: maxGracePeriod,
DisableEviction: disableEviction,
Parallel: parallel,
NodeGroups: allNodeGroups,
Plan: cmd.Plan,
MaxGracePeriod: maxGracePeriod,
PodEvictionWaitPeriod: podEvictionWaitPeriod,
DisableEviction: disableEviction,
Parallel: parallel,
}
err := nodeGroupManager.Drain(drainInput)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/ctl/delete/nodegroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ var _ = Describe("delete", func() {
cmd := newMockEmptyCmd(args...)
count := 0
cmdutils.AddResourceCmd(cmdutils.NewGrouping(), cmd.parentCmd, func(cmd *cmdutils.Cmd) {
deleteNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *v1alpha5.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod time.Duration, disableEviction bool, parallel int) error {
deleteNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *v1alpha5.NodeGroup, updateAuthConfigMap, deleteNodeGroupDrain, onlyMissing bool, maxGracePeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error {
Expect(cmd.ClusterConfig.Metadata.Name).To(Equal("clusterName"))
Expect(ng.Name).To(Equal("ng"))
count++
Expand Down
40 changes: 22 additions & 18 deletions pkg/ctl/drain/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,31 @@ import (
)

func drainNodeGroupCmd(cmd *cmdutils.Cmd) {
drainNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, disableEviction bool, parallel int) error {
return doDrainNodeGroup(cmd, ng, undo, onlyMissing, maxGracePeriod, nodeDrainWaitPeriod, disableEviction, parallel)
drainNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error {
return doDrainNodeGroup(cmd, ng, undo, onlyMissing, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod, disableEviction, parallel)
})
}

func drainNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, disableEviction bool, parallel int) error) {
func drainNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error) {
cfg := api.NewClusterConfig()
ng := api.NewNodeGroup()
cmd.ClusterConfig = cfg

var (
undo bool
onlyMissing bool
disableEviction bool
parallel int
maxGracePeriod time.Duration
nodeDrainWaitPeriod time.Duration
undo bool
onlyMissing bool
disableEviction bool
parallel int
maxGracePeriod time.Duration
nodeDrainWaitPeriod time.Duration
podEvictionWaitPeriod time.Duration
)

cmd.SetDescription("nodegroup", "Cordon and drain a nodegroup", "", "ng")

cmd.CobraCommand.RunE = func(_ *cobra.Command, args []string) error {
cmd.NameArg = cmdutils.GetNameArg(args)
return runFunc(cmd, ng, undo, onlyMissing, maxGracePeriod, nodeDrainWaitPeriod, disableEviction, parallel)
return runFunc(cmd, ng, undo, onlyMissing, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod, disableEviction, parallel)
}

cmd.FlagSetGroup.InFlagSet("General", func(fs *pflag.FlagSet) {
Expand All @@ -52,6 +53,8 @@ func drainNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd
fs.BoolVar(&undo, "undo", false, "Uncordon the nodegroup")
defaultMaxGracePeriod, _ := time.ParseDuration("10m")
fs.DurationVar(&maxGracePeriod, "max-grace-period", defaultMaxGracePeriod, "Maximum pods termination grace period")
defaultPodEvictionWaitPeriod, _ := time.ParseDuration("10s")
fs.DurationVar(&podEvictionWaitPeriod, "pod-eviction-wait-period", defaultPodEvictionWaitPeriod, "Duration to wait after failing to evict a pod")
defaultDisableEviction := false
fs.BoolVar(&disableEviction, "disable-eviction", defaultDisableEviction, "Force drain to use delete, even if eviction is supported. This will bypass checking PodDisruptionBudgets, use with caution.")
cmdutils.AddTimeoutFlag(fs, &cmd.ProviderConfig.WaitTimeout)
Expand All @@ -62,7 +65,7 @@ func drainNodeGroupWithRunFunc(cmd *cmdutils.Cmd, runFunc func(cmd *cmdutils.Cmd
cmdutils.AddCommonFlagsForAWS(cmd.FlagSetGroup, &cmd.ProviderConfig, true)
}

func doDrainNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, disableEviction bool, parallel int) error {
func doDrainNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error {
ngFilter := filter.NewNodeGroupFilter()

if err := cmdutils.NewDeleteAndDrainNodeGroupLoader(cmd, ng, ngFilter).Load(); err != nil {
Expand Down Expand Up @@ -125,13 +128,14 @@ func doDrainNodeGroup(cmd *cmdutils.Cmd, ng *api.NodeGroup, undo, onlyMissing bo
allNodeGroups := cmdutils.ToKubeNodeGroups(cfg)

drainInput := &nodegroup.DrainInput{
NodeGroups: allNodeGroups,
Plan: cmd.Plan,
MaxGracePeriod: maxGracePeriod,
NodeDrainWaitPeriod: nodeDrainWaitPeriod,
Undo: undo,
DisableEviction: disableEviction,
Parallel: parallel,
NodeGroups: allNodeGroups,
Plan: cmd.Plan,
MaxGracePeriod: maxGracePeriod,
NodeDrainWaitPeriod: nodeDrainWaitPeriod,
PodEvictionWaitPeriod: podEvictionWaitPeriod,
Undo: undo,
DisableEviction: disableEviction,
Parallel: parallel,
}
return nodegroup.New(cfg, ctl, clientSet).Drain(drainInput)
}
2 changes: 1 addition & 1 deletion pkg/ctl/drain/nodegroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var _ = Describe("drain node group", func() {
cmd := newMockEmptyCmd(args...)
count := 0
cmdutils.AddResourceCmd(cmdutils.NewGrouping(), cmd.parentCmd, func(cmd *cmdutils.Cmd) {
drainNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *v1alpha5.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod time.Duration, disableEviction bool, parallel int) error {
drainNodeGroupWithRunFunc(cmd, func(cmd *cmdutils.Cmd, ng *v1alpha5.NodeGroup, undo, onlyMissing bool, maxGracePeriod, nodeDrainWaitPeriod, podEvictionWaitPeriod time.Duration, disableEviction bool, parallel int) error {
Expect(cmd.ClusterConfig.Metadata.Name).To(Equal("clusterName"))
Expect(ng.Name).To(Equal("ng"))
count++
Expand Down
5 changes: 2 additions & 3 deletions pkg/drain/evictor/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (

const (
daemonSetFatal = "DaemonSet-managed Pods (use --ignore-daemonsets to ignore)"
daemonSetWarning = "ignoring DaemonSet-managed Pods"
localStorageFatal = "Pods with local storage (use --delete-local-data to override)"
localStorageWarning = "deleting Pods with local storage"
unmanagedFatal = "Pods not managed by ReplicationController, ReplicaSet, Job, DaemonSet or StatefulSet (use --force to override)"
Expand Down Expand Up @@ -204,7 +203,7 @@ func (d *Evictor) daemonSetFilter(pod corev1.Pod) PodDeleteStatus {
if controllerRef.Name == ignoreDaemonSet.Name {
switch ignoreDaemonSet.Namespace {
case pod.Namespace, metav1.NamespaceAll:
return makePodDeleteStatusWithWarning(false, daemonSetWarning)
return makePodDeleteStatusSkip()
}
}
}
Expand All @@ -213,7 +212,7 @@ func (d *Evictor) daemonSetFilter(pod corev1.Pod) PodDeleteStatus {
return makePodDeleteStatusWithError(daemonSetFatal)
}

return makePodDeleteStatusWithWarning(false, daemonSetWarning)
return makePodDeleteStatusSkip()
}

func (d *Evictor) mirrorPodFilter(pod corev1.Pod) PodDeleteStatus {
Expand Down
Loading

0 comments on commit 20ac5db

Please sign in to comment.