From ad98cb321a9989288512ac86cdcc5c0533239f21 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Sat, 30 Sep 2023 20:41:37 +0200 Subject: [PATCH] autoscaler: fix premature end of binpacking When PermissionToAddNode gets called without actually adding a new node, the node counter in the thresholdBasedEstimationLimiter gets out of sync with the actual number of new nodes. This can happen when the "is the last node empty" check triggers. The solution used here is to rearrange the checks so that PermissionToAddNode is followed by adding a new node. Alternatively, it might also be possible to pass the current number of nodes as parameter. --- .../estimator/binpacking_estimator.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/cluster-autoscaler/estimator/binpacking_estimator.go b/cluster-autoscaler/estimator/binpacking_estimator.go index 2f7760cbe0a0..86a26de22550 100644 --- a/cluster-autoscaler/estimator/binpacking_estimator.go +++ b/cluster-autoscaler/estimator/binpacking_estimator.go @@ -106,12 +106,6 @@ func (e *BinpackingNodeEstimator) Estimate( } if !found { - // Stop binpacking if we reach the limit of nodes we can add. - // We return the result of the binpacking that we already performed. - if !e.limiter.PermissionToAddNode() { - break - } - // If the last node we've added is empty and the pod couldn't schedule on it, it wouldn't be able to schedule // on a new node either. There is no point adding more nodes to snapshot in such case, especially because of // performance cost each extra node adds to future FitsAnyNodeMatching calls. @@ -119,6 +113,16 @@ func (e *BinpackingNodeEstimator) Estimate( continue } + // Stop binpacking if we reach the limit of nodes we can add. + // We return the result of the binpacking that we already performed. + // + // The thresholdBasedEstimationLimiter implementation assumes that for + // each call that returns true, one node gets added. Therefore this + // must be the last check right before really adding a node. + if !e.limiter.PermissionToAddNode() { + break + } + // Add new node newNodeName, err := e.addNewNodeToSnapshot(nodeTemplate, newNodeNameIndex) if err != nil {