Skip to content

Commit

Permalink
fix: identity for not yet created instances when scaling node groups
Browse files Browse the repository at this point in the history
  • Loading branch information
Lukasz Piatkowski committed May 17, 2019
1 parent 750001f commit 1cfffab
Show file tree
Hide file tree
Showing 9 changed files with 136 additions and 19 deletions.
79 changes: 68 additions & 11 deletions cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package aws
import (
"fmt"
"reflect"
"regexp"
"strings"
"sync"

Expand All @@ -30,7 +31,12 @@ import (
"k8s.io/klog"
)

const scaleToZeroSupported = true
const (
scaleToZeroSupported = true
placeholderInstanceNamePrefix = "i-placeholder-"
// TimeoutedPlaceholderName is used to mark placeholder instances that did not come up with timeout
TimeoutedPlaceholderName = "i-timeouted-placeholder"
)

type asgCache struct {
registeredAsgs []*asg
Expand Down Expand Up @@ -195,6 +201,10 @@ func (m *asgCache) SetAsgSize(asg *asg, size int) error {
m.mutex.Lock()
defer m.mutex.Unlock()

return m.setAsgSizeNoLock(asg, size)
}

func (m *asgCache) setAsgSizeNoLock(asg *asg, size int) error {
params := &autoscaling.SetDesiredCapacityInput{
AutoScalingGroupName: aws.String(asg.Name),
DesiredCapacity: aws.Int64(int64(size)),
Expand All @@ -212,6 +222,10 @@ func (m *asgCache) SetAsgSize(asg *asg, size int) error {
return nil
}

func (m *asgCache) decreaseAsgSizeByOneNoLock(asg *asg) error {
return m.setAsgSizeNoLock(asg, asg.curSize-1)
}

// DeleteInstances deletes the given instances. All instances must be controlled by the same ASG.
func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
m.mutex.Lock()
Expand Down Expand Up @@ -239,21 +253,38 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
}

for _, instance := range instances {
params := &autoscaling.TerminateInstanceInAutoScalingGroupInput{
InstanceId: aws.String(instance.Name),
ShouldDecrementDesiredCapacity: aws.Bool(true),
}
resp, err := m.service.TerminateInstanceInAutoScalingGroup(params)
if err != nil {
return err
// check if the instance is a placeholder - a requested instance that was never created by the node group
// if it is, just decrease the size of the node group, as there's no specific instance we can remove
matched, err := regexp.MatchString(fmt.Sprintf("^%s\\d+$", placeholderInstanceNamePrefix), instance.Name)
if err == nil && matched {
klog.V(4).Infof("instance %s is detected as a placeholder, decreasing ASG requested size instead "+
"of deleting instance", instance.Name)
m.decreaseAsgSizeByOneNoLock(commonAsg)
// mark this instance using its name as a timeouted placeholder
asg := m.instanceToAsg[*instance]
delete(m.instanceToAsg, *instance)
instance.Name = TimeoutedPlaceholderName
m.instanceToAsg[*instance] = asg
for i := range m.asgToInstances[commonAsg.AwsRef] {
if m.asgToInstances[commonAsg.AwsRef][i].ProviderID == instance.ProviderID {
m.asgToInstances[commonAsg.AwsRef][i].Name = TimeoutedPlaceholderName
}
}
} else {
params := &autoscaling.TerminateInstanceInAutoScalingGroupInput{
InstanceId: aws.String(instance.Name),
ShouldDecrementDesiredCapacity: aws.Bool(true),
}
resp, err := m.service.TerminateInstanceInAutoScalingGroup(params)
if err != nil {
return err
}
klog.V(4).Infof(*resp.Activity.Description)
}

// Proactively decrement the size so autoscaler makes better decisions
commonAsg.curSize--

klog.V(4).Infof(*resp.Activity.Description)
}

return nil
}

Expand Down Expand Up @@ -323,6 +354,11 @@ func (m *asgCache) regenerate() error {
return err
}

// If currently any ASG has more Desired than running Instances, introduce placeholders
// for the instances to come up. This is required to track Desired instances that
// will never come up, like with Spot Request that can't be fulfilled
groups = m.createPlaceholdersForDesiredNonStartedInstances(groups)

// Register or update ASGs
exists := make(map[AwsRef]bool)
for _, group := range groups {
Expand Down Expand Up @@ -355,6 +391,27 @@ func (m *asgCache) regenerate() error {
return nil
}

func (m *asgCache) createPlaceholdersForDesiredNonStartedInstances(groups []*autoscaling.Group) []*autoscaling.Group {
for _, g := range groups {
desired := *g.DesiredCapacity
real := int64(len(g.Instances))
if desired <= real {
continue
}

for i := real; i < desired; i++ {
id := fmt.Sprintf("%s%d", placeholderInstanceNamePrefix, i)
klog.V(4).Infof("Instance group %s has only %d instances created while requested count is %d."+
"Creating placeholder instance with ID %s", *g.AutoScalingGroupName, real, desired, id)
g.Instances = append(g.Instances, &autoscaling.Instance{
InstanceId: &id,
AvailabilityZone: g.AvailabilityZones[0],
})
}
}
return groups
}

func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) {
spec := dynamic.NodeGroupSpec{
Name: aws.StringValue(g.AutoScalingGroupName),
Expand Down
11 changes: 11 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,17 @@ func (ng *AwsNodeGroup) Nodes() ([]cloudprovider.Instance, error) {

for i, asgNode := range asgNodes {
instances[i] = cloudprovider.Instance{Id: asgNode.ProviderID}
// check if the instance is a placeholder that failed to start within timeout
// if yes, set InstanceStatus accordingly
if asgNode.Name == TimeoutedPlaceholderName {
instances[i].Status = &cloudprovider.InstanceStatus{
State: cloudprovider.InstanceCreating,
ErrorInfo: &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OutOfResourcesErrorClass,
ErrorMessage: "instance failed to start in the ASG within timeout",
},
}
}
}
return instances, nil
}
Expand Down
12 changes: 11 additions & 1 deletion cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,16 @@ func TestFetchExplicitAsgs(t *testing.T) {
mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"),
).Run(func(args mock.Arguments) {
fn := args.Get(1).(func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool)
zone := "test-1a"
fn(&autoscaling.DescribeAutoScalingGroupsOutput{
AutoScalingGroups: []*autoscaling.Group{
{AutoScalingGroupName: aws.String(groupname)},
{
AvailabilityZones: []*string{&zone},
AutoScalingGroupName: aws.String(groupname),
MinSize: aws.Int64(int64(min)),
MaxSize: aws.Int64(int64(max)),
DesiredCapacity: aws.Int64(int64(min)),
},
}}, false)
}).Return(nil)

Expand Down Expand Up @@ -381,11 +388,14 @@ func TestFetchAutoAsgs(t *testing.T) {
mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"),
).Run(func(args mock.Arguments) {
fn := args.Get(1).(func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool)
zone := "test-1a"
fn(&autoscaling.DescribeAutoScalingGroupsOutput{
AutoScalingGroups: []*autoscaling.Group{{
AvailabilityZones: []*string{&zone},
AutoScalingGroupName: aws.String(groupname),
MinSize: aws.Int64(int64(min)),
MaxSize: aws.Int64(int64(max)),
DesiredCapacity: aws.Int64(int64(min)),
}}}, false)
}).Return(nil).Twice()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
"reflect"
"strings"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/magnum/gophercloud"
yaml "gopkg.in/yaml.v2"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/magnum/gophercloud"
)

// Client is an interface that expects a Get method similar to http.Get. This
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (current LinkedPageBase) NextPageURL() (string, error) {
}

for {
key, path = path[0], path[1:len(path)]
key, path = path[0], path[1:]

value, ok := submap[key]
if !ok {
Expand Down
10 changes: 9 additions & 1 deletion cluster-autoscaler/clusterstate/clusterstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import (

const (
// MaxNodeStartupTime is the maximum time from the moment the node is registered to the time the node is ready.
MaxNodeStartupTime = 15 * time.Minute
MaxNodeStartupTime = 1 * time.Minute

// MaxStatusSettingDelayAfterCreation is the maximum time for node to set its initial status after the
// node is registered.
Expand Down Expand Up @@ -248,6 +248,14 @@ func (csr *ClusterStateRegistry) updateScaleRequests(currentTime time.Time) {
csr.scaleDownRequests = newScaleDownRequests
}

// BackoffNodeGroup is used to force the specified nodeGroup to go into backoff mode, which
// means it won't be used for scaling out temporarily
func (csr *ClusterStateRegistry) BackoffNodeGroup(nodeGroup cloudprovider.NodeGroup, currentTime time.Time) {
csr.Lock()
defer csr.Unlock()
csr.backoffNodeGroup(nodeGroup, cloudprovider.OtherErrorClass, "cloudProviderError", currentTime)
}

// To be executed under a lock.
func (csr *ClusterStateRegistry) backoffNodeGroup(nodeGroup cloudprovider.NodeGroup, errorClass cloudprovider.InstanceErrorClass, errorCode string, currentTime time.Time) {
nodeGroupInfo := csr.nodeInfosForGroups[nodeGroup.Id()]
Expand Down
3 changes: 2 additions & 1 deletion cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
unregisteredNodes := a.clusterStateRegistry.GetUnregisteredNodes()
if len(unregisteredNodes) > 0 {
klog.V(1).Infof("%d unregistered nodes present", len(unregisteredNodes))
removedAny, err := removeOldUnregisteredNodes(unregisteredNodes, autoscalingContext, currentTime, autoscalingContext.LogRecorder)
removedAny, err := removeOldUnregisteredNodes(unregisteredNodes, autoscalingContext, a.clusterStateRegistry,
currentTime, autoscalingContext.LogRecorder)
// There was a problem with removing unregistered nodes. Retry in the next loop.
if err != nil {
if removedAny {
Expand Down
32 changes: 31 additions & 1 deletion cluster-autoscaler/core/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ func sanitizeTemplateNode(node *apiv1.Node, nodeGroup string) (*apiv1.Node, erro

// Removes unregistered nodes if needed. Returns true if anything was removed and error if such occurred.
func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNode, context *context.AutoscalingContext,
currentTime time.Time, logRecorder *utils.LogEventRecorder) (bool, error) {
clusterStateRegistry *clusterstate.ClusterStateRegistry, currentTime time.Time, logRecorder *utils.LogEventRecorder) (bool, error) {
removedAny := false
for _, unregisteredNode := range unregisteredNodes {
if unregisteredNode.UnregisteredSince.Add(context.MaxNodeProvisionTime).Before(currentTime) {
Expand Down Expand Up @@ -416,6 +416,20 @@ func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNod
"Failed to remove node %s: %v", unregisteredNode.Node.Name, err)
return removedAny, err
}

failedPlaceholder, err := anyPlaceholderInstanceStartupFailed(nodeGroup)
if err != nil {
return removedAny, err
}
if failedPlaceholder {
// this means only a placeholder instance was deleted - it is an instance, that was requested,
// but was not create before StartUpTimeout. It means something's wrong with this specific
// node group and we temporarily suspend requesting new instances from it by registering
// a failed scale up
klog.Warningf("Timeout trying to scale node group %s, enabling backoff for the group", nodeGroup.Id())
clusterStateRegistry.BackoffNodeGroup(nodeGroup, time.Now())
}

logRecorder.Eventf(apiv1.EventTypeNormal, "DeleteUnregistered",
"Removed unregistered node %v", unregisteredNode.Node.Name)
removedAny = true
Expand All @@ -424,6 +438,22 @@ func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNod
return removedAny, nil
}

// anyPlaceholderInstanceStartupFailed returns true, if any placeholder instance in the nodeGroup
// failed to start before timeout was triggered
func anyPlaceholderInstanceStartupFailed(nodeGroup cloudprovider.NodeGroup) (bool, error) {
nodes, err := nodeGroup.Nodes()
if err != nil {
return false, err
}
for _, node := range nodes {
if node.Status != nil && node.Status.State == cloudprovider.InstanceCreating &&
node.Status.ErrorInfo != nil && node.Status.ErrorInfo.ErrorClass == cloudprovider.OutOfResourcesErrorClass {
return true, nil
}
}
return false, nil
}

// Sets the target size of node groups to the current number of nodes in them
// if the difference was constant for a prolonged time. Returns true if managed
// to fix something.
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/core/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,12 @@ func TestRemoveOldUnregisteredNodes(t *testing.T) {
assert.Equal(t, 1, len(unregisteredNodes))

// Nothing should be removed. The unregistered node is not old enough.
removed, err := removeOldUnregisteredNodes(unregisteredNodes, context, now.Add(-50*time.Minute), fakeLogRecorder)
removed, err := removeOldUnregisteredNodes(unregisteredNodes, context, clusterState, now.Add(-50*time.Minute), fakeLogRecorder)
assert.NoError(t, err)
assert.False(t, removed)

// ng1_2 should be removed.
removed, err = removeOldUnregisteredNodes(unregisteredNodes, context, now, fakeLogRecorder)
removed, err = removeOldUnregisteredNodes(unregisteredNodes, context, clusterState, now, fakeLogRecorder)
assert.NoError(t, err)
assert.True(t, removed)
deletedNode := getStringFromChan(deletedNodes)
Expand Down

0 comments on commit 1cfffab

Please sign in to comment.