From 4477707256f3fb011e39887f9d302f2c1f1733b4 Mon Sep 17 00:00:00 2001 From: Will Bowers <22203232+wllbo@users.noreply.github.com> Date: Tue, 13 Feb 2024 07:12:40 -0800 Subject: [PATCH] remove RemoveBackoff from updateScaleRequests --- .../clusterstate/clusterstate.go | 4 +--- .../clusterstate/clusterstate_test.go | 23 ++++++++++++++++--- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go index 2240d3f22883..1a28e1d37c50 100644 --- a/cluster-autoscaler/clusterstate/clusterstate.go +++ b/cluster-autoscaler/clusterstate/clusterstate.go @@ -261,10 +261,8 @@ func (csr *ClusterStateRegistry) updateScaleRequests(currentTime time.Time) { for nodeGroupName, scaleUpRequest := range csr.scaleUpRequests { if !csr.areThereUpcomingNodesInNodeGroup(nodeGroupName) { - // scale-out finished successfully - // remove it and reset node group backoff + // scale up finished successfully, remove request delete(csr.scaleUpRequests, nodeGroupName) - csr.backoff.RemoveBackoff(scaleUpRequest.NodeGroup, csr.nodeInfosForGroups[scaleUpRequest.NodeGroup.Id()]) klog.V(4).Infof("Scale up in group %v finished successfully in %v", nodeGroupName, currentTime.Sub(scaleUpRequest.Time)) continue diff --git a/cluster-autoscaler/clusterstate/clusterstate_test.go b/cluster-autoscaler/clusterstate/clusterstate_test.go index 52f2952d6807..7b4fb3d6c736 100644 --- a/cluster-autoscaler/clusterstate/clusterstate_test.go +++ b/cluster-autoscaler/clusterstate/clusterstate_test.go @@ -854,7 +854,7 @@ func TestScaleUpBackoff(t *testing.T) { }, }, clusterstate.NodeGroupScaleUpSafety(ng1, now)) - // The backoff should be cleared after a successful scale-up + // After successful scale-up, node group should still be backed off clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 1, now) ng1_4 := BuildTestNode("ng1-4", 1000, 1000) SetNodeReadyState(ng1_4, true, now.Add(-1*time.Minute)) @@ -863,8 +863,25 @@ func TestScaleUpBackoff(t *testing.T) { assert.NoError(t, err) assert.True(t, clusterstate.IsClusterHealthy()) assert.True(t, clusterstate.IsNodeGroupHealthy("ng1")) - assert.Equal(t, NodeGroupScalingSafety{SafeToScale: true, Healthy: true}, clusterstate.NodeGroupScaleUpSafety(ng1, now)) - assert.Equal(t, backoff.Status{IsBackedOff: false}, clusterstate.backoff.BackoffStatus(ng1, nil, now)) + assert.Equal(t, NodeGroupScalingSafety{ + SafeToScale: false, + Healthy: true, + BackoffStatus: backoff.Status{ + IsBackedOff: true, + ErrorInfo: cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OtherErrorClass, + ErrorCode: "timeout", + ErrorMessage: "Scale-up timed out for node group ng1 after 2m1s", + }, + }, + }, clusterstate.NodeGroupScaleUpSafety(ng1, now)) + assert.Equal(t, backoff.Status{ + IsBackedOff: true, + ErrorInfo: cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OtherErrorClass, + ErrorCode: "timeout", + ErrorMessage: "Scale-up timed out for node group ng1 after 2m1s", + }}, clusterstate.backoff.BackoffStatus(ng1, nil, now)) } func TestGetClusterSize(t *testing.T) {