diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go index 0b2518e7528c..dfdc731678f9 100644 --- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go +++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go @@ -19,6 +19,7 @@ package aws import ( "fmt" "reflect" + "regexp" "strings" "sync" @@ -30,7 +31,10 @@ import ( "k8s.io/klog" ) -const scaleToZeroSupported = true +const ( + scaleToZeroSupported = true + placeholderInstanceNamePrefix = "i-placeholder-" +) type asgCache struct { registeredAsgs []*asg @@ -195,6 +199,10 @@ func (m *asgCache) SetAsgSize(asg *asg, size int) error { m.mutex.Lock() defer m.mutex.Unlock() + return m.setAsgSizeNoLock(asg, size) +} + +func (m *asgCache) setAsgSizeNoLock(asg *asg, size int) error { params := &autoscaling.SetDesiredCapacityInput{ AutoScalingGroupName: aws.String(asg.Name), DesiredCapacity: aws.Int64(int64(size)), @@ -212,6 +220,10 @@ func (m *asgCache) SetAsgSize(asg *asg, size int) error { return nil } +func (m *asgCache) decreaseAsgSizeByOneNoLock(asg *asg) error { + return m.setAsgSizeNoLock(asg, asg.curSize-1) +} + // DeleteInstances deletes the given instances. All instances must be controlled by the same ASG. func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error { m.mutex.Lock() @@ -238,22 +250,37 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error { } } + wasPlaceholderDeleted := false for _, instance := range instances { - params := &autoscaling.TerminateInstanceInAutoScalingGroupInput{ - InstanceId: aws.String(instance.Name), - ShouldDecrementDesiredCapacity: aws.Bool(true), - } - resp, err := m.service.TerminateInstanceInAutoScalingGroup(params) - if err != nil { - return err + // check if the instance is a placeholder - a requested instance that was never created by the node group + // if it is, just decrease the size of the node group, as there's no specific instance we can remove + matched, err := regexp.MatchString(fmt.Sprintf("^%s\\d+$", placeholderInstanceNamePrefix), instance.Name) + if err == nil && matched { + klog.V(4).Infof("instance %s is detected as a placeholder, decreasing ASG requested size instead "+ + "of deleting instance", instance.Name) + m.decreaseAsgSizeByOneNoLock(commonAsg) + wasPlaceholderDeleted = true + } else { + params := &autoscaling.TerminateInstanceInAutoScalingGroupInput{ + InstanceId: aws.String(instance.Name), + ShouldDecrementDesiredCapacity: aws.Bool(true), + } + resp, err := m.service.TerminateInstanceInAutoScalingGroup(params) + if err != nil { + return err + } + klog.V(4).Infof(*resp.Activity.Description) } // Proactively decrement the size so autoscaler makes better decisions commonAsg.curSize-- - - klog.V(4).Infof(*resp.Activity.Description) } + if wasPlaceholderDeleted { + return &cloudprovider.PlaceholderDeleteError{ + NodeGroupId: commonAsg.Name, + } + } return nil } @@ -323,6 +350,11 @@ func (m *asgCache) regenerate() error { return err } + // If currently any ASG has more Desired than running Instances, introduce placeholders + // for the instances to come up. This is required to track Desired instances that + // will never come up, like with Spot Request that can't be fulfilled + groups = m.createPlaceholdersForDesiredNonStartedInstances(groups) + // Register or update ASGs exists := make(map[AwsRef]bool) for _, group := range groups { @@ -355,6 +387,27 @@ func (m *asgCache) regenerate() error { return nil } +func (m *asgCache) createPlaceholdersForDesiredNonStartedInstances(groups []*autoscaling.Group) []*autoscaling.Group { + for _, g := range groups { + desired := *g.DesiredCapacity + real := int64(len(g.Instances)) + if desired <= real { + continue + } + + for i := real; i < desired; i++ { + id := fmt.Sprintf("%s%d", placeholderInstanceNamePrefix, i) + klog.V(4).Infof("Instance group %s has only %d instances created while requested count is %d."+ + "Creating placeholder instance with ID %s", *g.AutoScalingGroupName, real, desired, id) + g.Instances = append(g.Instances, &autoscaling.Instance{ + InstanceId: &id, + AvailabilityZone: g.AvailabilityZones[0], + }) + } + } + return groups +} + func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) { spec := dynamic.NodeGroupSpec{ Name: aws.StringValue(g.AutoScalingGroupName), diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go index ed8a5f2e84e9..97a051bf60a2 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go @@ -207,9 +207,16 @@ func TestFetchExplicitAsgs(t *testing.T) { mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"), ).Run(func(args mock.Arguments) { fn := args.Get(1).(func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool) + zone := "test-1a" fn(&autoscaling.DescribeAutoScalingGroupsOutput{ AutoScalingGroups: []*autoscaling.Group{ - {AutoScalingGroupName: aws.String(groupname)}, + { + AvailabilityZones: []*string{&zone}, + AutoScalingGroupName: aws.String(groupname), + MinSize: aws.Int64(int64(min)), + MaxSize: aws.Int64(int64(max)), + DesiredCapacity: aws.Int64(int64(min)), + }, }}, false) }).Return(nil) @@ -381,11 +388,14 @@ func TestFetchAutoAsgs(t *testing.T) { mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"), ).Run(func(args mock.Arguments) { fn := args.Get(1).(func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool) + zone := "test-1a" fn(&autoscaling.DescribeAutoScalingGroupsOutput{ AutoScalingGroups: []*autoscaling.Group{{ + AvailabilityZones: []*string{&zone}, AutoScalingGroupName: aws.String(groupname), MinSize: aws.Int64(int64(min)), MaxSize: aws.Int64(int64(max)), + DesiredCapacity: aws.Int64(int64(min)), }}}, false) }).Return(nil).Twice() diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index f3420cc3e5e9..b6d761fc6ada 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -17,6 +17,7 @@ limitations under the License. package cloudprovider import ( + "fmt" "time" apiv1 "k8s.io/api/core/v1" @@ -235,3 +236,14 @@ func ContainsGpuResources(resources []string) bool { } return false } + +// PlaceholderDeleteError is returned by delete functions when the delete request was targeting +// a placeholder node information, not a real node. This means that no node was deleted, +// only the related node group was scaled down. +type PlaceholderDeleteError struct { + NodeGroupId string +} + +func (p *PlaceholderDeleteError) Error() string { + return fmt.Sprintf("some of the nodes in %s group were placeholders", p.NodeGroupId) +} diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 49af04ad4d5a..d8e2af9aaf9c 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -227,7 +227,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError unregisteredNodes := a.clusterStateRegistry.GetUnregisteredNodes() if len(unregisteredNodes) > 0 { klog.V(1).Infof("%d unregistered nodes present", len(unregisteredNodes)) - removedAny, err := removeOldUnregisteredNodes(unregisteredNodes, autoscalingContext, currentTime, autoscalingContext.LogRecorder) + removedAny, err := removeOldUnregisteredNodes(unregisteredNodes, autoscalingContext, a.clusterStateRegistry, + currentTime, autoscalingContext.LogRecorder) // There was a problem with removing unregistered nodes. Retry in the next loop. if err != nil { if removedAny { diff --git a/cluster-autoscaler/core/utils.go b/cluster-autoscaler/core/utils.go index 81b366dfd582..da77f75e37a3 100644 --- a/cluster-autoscaler/core/utils.go +++ b/cluster-autoscaler/core/utils.go @@ -386,7 +386,7 @@ func sanitizeTemplateNode(node *apiv1.Node, nodeGroup string) (*apiv1.Node, erro // Removes unregistered nodes if needed. Returns true if anything was removed and error if such occurred. func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNode, context *context.AutoscalingContext, - currentTime time.Time, logRecorder *utils.LogEventRecorder) (bool, error) { + clusterStateRegistry *clusterstate.ClusterStateRegistry, currentTime time.Time, logRecorder *utils.LogEventRecorder) (bool, error) { removedAny := false for _, unregisteredNode := range unregisteredNodes { if unregisteredNode.UnregisteredSince.Add(context.MaxNodeProvisionTime).Before(currentTime) { @@ -411,10 +411,20 @@ func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNod } err = nodeGroup.DeleteNodes([]*apiv1.Node{unregisteredNode.Node}) if err != nil { - klog.Warningf("Failed to remove node %s: %v", unregisteredNode.Node.Name, err) - logRecorder.Eventf(apiv1.EventTypeWarning, "DeleteUnregisteredFailed", - "Failed to remove node %s: %v", unregisteredNode.Node.Name, err) - return removedAny, err + _, wasPlaceholder := err.(*cloudprovider.PlaceholderDeleteError) + // this means only a placeholder instance was deleted - it is an instance, that was requested, + // but was not create before StartUpTimeout. It means something's wrong with this specific + // node group and we temporarily suspend requesting new instances from it by registering + // a failed scale up + if wasPlaceholder { + klog.Warningf("Timeout trying to scale node group %s, enabling backoff for the group", nodeGroup.Id()) + clusterStateRegistry.RegisterFailedScaleUp(nodeGroup, metrics.Timeout, time.Now()) + } else { + klog.Warningf("Failed to remove node %s: %v", unregisteredNode.Node.Name, err) + logRecorder.Eventf(apiv1.EventTypeWarning, "DeleteUnregisteredFailed", + "Failed to remove node %s: %v", unregisteredNode.Node.Name, err) + return removedAny, err + } } logRecorder.Eventf(apiv1.EventTypeNormal, "DeleteUnregistered", "Removed unregistered node %v", unregisteredNode.Node.Name) diff --git a/cluster-autoscaler/core/utils_test.go b/cluster-autoscaler/core/utils_test.go index 181118c021d3..946198f9c82b 100644 --- a/cluster-autoscaler/core/utils_test.go +++ b/cluster-autoscaler/core/utils_test.go @@ -450,12 +450,12 @@ func TestRemoveOldUnregisteredNodes(t *testing.T) { assert.Equal(t, 1, len(unregisteredNodes)) // Nothing should be removed. The unregistered node is not old enough. - removed, err := removeOldUnregisteredNodes(unregisteredNodes, context, now.Add(-50*time.Minute), fakeLogRecorder) + removed, err := removeOldUnregisteredNodes(unregisteredNodes, context, clusterState, now.Add(-50*time.Minute), fakeLogRecorder) assert.NoError(t, err) assert.False(t, removed) // ng1_2 should be removed. - removed, err = removeOldUnregisteredNodes(unregisteredNodes, context, now, fakeLogRecorder) + removed, err = removeOldUnregisteredNodes(unregisteredNodes, context, clusterState, now, fakeLogRecorder) assert.NoError(t, err) assert.True(t, removed) deletedNode := getStringFromChan(deletedNodes)