Skip to content

Commit

Permalink
add option to keep node group backoff on OutOfResource error
Browse files Browse the repository at this point in the history
  • Loading branch information
wllbo committed May 12, 2023
1 parent ee59c74 commit 70e1991
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 19 deletions.
8 changes: 7 additions & 1 deletion cluster-autoscaler/clusterstate/clusterstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ type ClusterStateRegistryConfig struct {
// Minimum number of nodes that must be unready for MaxTotalUnreadyPercentage to apply.
// This is to ensure that in very small clusters (e.g. 2 nodes) a single node's failure doesn't disable autoscaling.
OkTotalUnreadyCount int
// NodeGroupKeepBackoffOutOfResources is whether a backoff can be removed before expiration when a scale-up fails due to the cloud provider being out of resources.
NodeGroupKeepBackoffOutOfResources bool
}

// IncorrectNodeGroupSize contains information about how much the current size of the node group
Expand Down Expand Up @@ -254,7 +256,11 @@ func (csr *ClusterStateRegistry) updateScaleRequests(currentTime time.Time) {
// scale-out finished successfully
// remove it and reset node group backoff
delete(csr.scaleUpRequests, nodeGroupName)
csr.backoff.RemoveBackoff(scaleUpRequest.NodeGroup, csr.nodeInfosForGroups[scaleUpRequest.NodeGroup.Id()])
shouldKeepBackoff := csr.config.NodeGroupKeepBackoffOutOfResources && csr.backoff.IsNodeGroupOutOfResources(scaleUpRequest.NodeGroup)
if !shouldKeepBackoff {
klog.V(4).Infof("Removing backoff for node group %v", scaleUpRequest.NodeGroup.Id())
csr.backoff.RemoveBackoff(scaleUpRequest.NodeGroup, csr.nodeInfosForGroups[scaleUpRequest.NodeGroup.Id()])
}
klog.V(4).Infof("Scale up in group %v finished successfully in %v",
nodeGroupName, currentTime.Sub(scaleUpRequest.Time))
continue
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ type AutoscalingOptions struct {
MaxNodeGroupBackoffDuration time.Duration
// NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.
NodeGroupBackoffResetTimeout time.Duration
// NodeGroupKeepBackoffOutOfResources is whether a backoff can be removed before expiration when a scale-up fails due to the cloud provider being out of resources.
NodeGroupKeepBackoffOutOfResources bool
// MaxScaleDownParallelism is the maximum number of nodes (both empty and needing drain) that can be deleted in parallel.
MaxScaleDownParallelism int
// MaxDrainParallelism is the maximum number of nodes needing drain, that can be drained and deleted in parallel.
Expand Down
5 changes: 3 additions & 2 deletions cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,9 @@ func NewStaticAutoscaler(
remainingPdbTracker)

clusterStateConfig := clusterstate.ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: opts.MaxTotalUnreadyPercentage,
OkTotalUnreadyCount: opts.OkTotalUnreadyCount,
MaxTotalUnreadyPercentage: opts.MaxTotalUnreadyPercentage,
OkTotalUnreadyCount: opts.OkTotalUnreadyCount,
NodeGroupKeepBackoffOutOfResources: opts.NodeGroupKeepBackoffOutOfResources,
}

taintConfig := taints.NewTaintConfig(opts)
Expand Down
34 changes: 18 additions & 16 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,22 +214,23 @@ var (
"maxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.")
nodeGroupBackoffResetTimeout = flag.Duration("node-group-backoff-reset-timeout", 3*time.Hour,
"nodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.")
maxScaleDownParallelismFlag = flag.Int("max-scale-down-parallelism", 10, "Maximum number of nodes (both empty and needing drain) that can be deleted in parallel.")
maxDrainParallelismFlag = flag.Int("max-drain-parallelism", 1, "Maximum number of nodes needing drain, that can be drained and deleted in parallel.")
recordDuplicatedEvents = flag.Bool("record-duplicated-events", false, "enable duplication of similar events within a 5 minute window.")
maxNodesPerScaleUp = flag.Int("max-nodes-per-scaleup", 1000, "Max nodes added in a single scale-up. This is intended strictly for optimizing CA algorithm latency and not a tool to rate-limit scale-up throughput.")
maxNodeGroupBinpackingDuration = flag.Duration("max-nodegroup-binpacking-duration", 10*time.Second, "Maximum time that will be spent in binpacking simulation for each NodeGroup.")
skipNodesWithSystemPods = flag.Bool("skip-nodes-with-system-pods", true, "If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods)")
skipNodesWithLocalStorage = flag.Bool("skip-nodes-with-local-storage", true, "If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath")
skipNodesWithCustomControllerPods = flag.Bool("skip-nodes-with-custom-controller-pods", true, "If true cluster autoscaler will never delete nodes with pods owned by custom controllers")
minReplicaCount = flag.Int("min-replica-count", 0, "Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down")
nodeDeleteDelayAfterTaint = flag.Duration("node-delete-delay-after-taint", 5*time.Second, "How long to wait before deleting a node after tainting it")
scaleDownSimulationTimeout = flag.Duration("scale-down-simulation-timeout", 30*time.Second, "How long should we run scale down simulation.")
parallelDrain = flag.Bool("parallel-drain", false, "Whether to allow parallel drain of nodes.")
maxCapacityMemoryDifferenceRatio = flag.Float64("memory-difference-ratio", config.DefaultMaxCapacityMemoryDifferenceRatio, "Maximum difference in memory capacity between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's memory capacity.")
maxFreeDifferenceRatio = flag.Float64("max-free-difference-ratio", config.DefaultMaxFreeDifferenceRatio, "Maximum difference in free resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's free resource.")
maxAllocatableDifferenceRatio = flag.Float64("max-allocatable-difference-ratio", config.DefaultMaxAllocatableDifferenceRatio, "Maximum difference in allocatable resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's allocatable resource.")
forceDaemonSets = flag.Bool("force-ds", false, "Blocks scale-up of node groups too small for all suitable Daemon Sets pods.")
nodeGroupKeepBackoffOutOfResources = flag.Bool("node-group-keep-backoff-out-of-resources", false, "Prevents removal of backoff before expiration when a scale-up fails due to the cloud provider being out of resources.")
maxScaleDownParallelismFlag = flag.Int("max-scale-down-parallelism", 10, "Maximum number of nodes (both empty and needing drain) that can be deleted in parallel.")
maxDrainParallelismFlag = flag.Int("max-drain-parallelism", 1, "Maximum number of nodes needing drain, that can be drained and deleted in parallel.")
recordDuplicatedEvents = flag.Bool("record-duplicated-events", false, "enable duplication of similar events within a 5 minute window.")
maxNodesPerScaleUp = flag.Int("max-nodes-per-scaleup", 1000, "Max nodes added in a single scale-up. This is intended strictly for optimizing CA algorithm latency and not a tool to rate-limit scale-up throughput.")
maxNodeGroupBinpackingDuration = flag.Duration("max-nodegroup-binpacking-duration", 10*time.Second, "Maximum time that will be spent in binpacking simulation for each NodeGroup.")
skipNodesWithSystemPods = flag.Bool("skip-nodes-with-system-pods", true, "If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods)")
skipNodesWithLocalStorage = flag.Bool("skip-nodes-with-local-storage", true, "If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath")
skipNodesWithCustomControllerPods = flag.Bool("skip-nodes-with-custom-controller-pods", true, "If true cluster autoscaler will never delete nodes with pods owned by custom controllers")
minReplicaCount = flag.Int("min-replica-count", 0, "Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down")
nodeDeleteDelayAfterTaint = flag.Duration("node-delete-delay-after-taint", 5*time.Second, "How long to wait before deleting a node after tainting it")
scaleDownSimulationTimeout = flag.Duration("scale-down-simulation-timeout", 30*time.Second, "How long should we run scale down simulation.")
parallelDrain = flag.Bool("parallel-drain", false, "Whether to allow parallel drain of nodes.")
maxCapacityMemoryDifferenceRatio = flag.Float64("memory-difference-ratio", config.DefaultMaxCapacityMemoryDifferenceRatio, "Maximum difference in memory capacity between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's memory capacity.")
maxFreeDifferenceRatio = flag.Float64("max-free-difference-ratio", config.DefaultMaxFreeDifferenceRatio, "Maximum difference in free resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's free resource.")
maxAllocatableDifferenceRatio = flag.Float64("max-allocatable-difference-ratio", config.DefaultMaxAllocatableDifferenceRatio, "Maximum difference in allocatable resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's allocatable resource.")
forceDaemonSets = flag.Bool("force-ds", false, "Blocks scale-up of node groups too small for all suitable Daemon Sets pods.")
)

func createAutoscalingOptions() config.AutoscalingOptions {
Expand Down Expand Up @@ -326,6 +327,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
InitialNodeGroupBackoffDuration: *initialNodeGroupBackoffDuration,
MaxNodeGroupBackoffDuration: *maxNodeGroupBackoffDuration,
NodeGroupBackoffResetTimeout: *nodeGroupBackoffResetTimeout,
NodeGroupKeepBackoffOutOfResources: *nodeGroupKeepBackoffOutOfResources,
MaxScaleDownParallelism: *maxScaleDownParallelismFlag,
MaxDrainParallelism: *maxDrainParallelismFlag,
RecordDuplicatedEvents: *recordDuplicatedEvents,
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/utils/backoff/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ type Backoff interface {
RemoveBackoff(nodeGroup cloudprovider.NodeGroup, nodeInfo *schedulerframework.NodeInfo)
// RemoveStaleBackoffData removes stale backoff data.
RemoveStaleBackoffData(currentTime time.Time)
// IsNodeGroupOutOfResources returns true if the given node group is out of resources.
IsNodeGroupOutOfResources(nodeGroup cloudprovider.NodeGroup) bool
}
8 changes: 8 additions & 0 deletions cluster-autoscaler/utils/backoff/exponential_backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ type exponentialBackoffInfo struct {
duration time.Duration
backoffUntil time.Time
lastFailedExecution time.Time
errorClass cloudprovider.InstanceErrorClass
}

// NewExponentialBackoff creates an instance of exponential backoff.
Expand Down Expand Up @@ -87,6 +88,7 @@ func (b *exponentialBackoff) Backoff(nodeGroup cloudprovider.NodeGroup, nodeInfo
duration: duration,
backoffUntil: backoffUntil,
lastFailedExecution: currentTime,
errorClass: errorClass,
}
return backoffUntil
}
Expand All @@ -110,3 +112,9 @@ func (b *exponentialBackoff) RemoveStaleBackoffData(currentTime time.Time) {
}
}
}

// IsNodeGroupOutOfResources returns true if the given node group is out of resources.
func (b *exponentialBackoff) IsNodeGroupOutOfResources(nodeGroup cloudprovider.NodeGroup) bool {
backoffInfo, found := b.backoffInfo[b.nodeGroupKey(nodeGroup)]
return found && backoffInfo.errorClass == cloudprovider.OutOfResourcesErrorClass
}

0 comments on commit 70e1991

Please sign in to comment.