From 030a2152b018334edf6101397b458d92b9ee5d48 Mon Sep 17 00:00:00 2001 From: Benjamin Pineau Date: Wed, 19 May 2021 12:05:40 +0200 Subject: [PATCH] Fix templated nodeinfo names collisions in BinpackingNodeEstimator Both upscale's `getUpcomingNodeInfos` and the binpacking estimator now uses the same shared DeepCopyTemplateNode function and inherits its naming pattern, which is great as that fixes a long standing bug. Due to that, `getUpcomingNodeInfos` will enrich the cluster snapshots with generated nodeinfos and nodes having predictable names (using template name + an incremental ordinal starting at 0) for upcoming nodes. Later, when it looks for fitting nodes for unschedulable pods (when upcoming nodes don't satisfy those (FitsAnyNodeMatching failing due to nodes capacity, or pods antiaffinity, ...), the binpacking estimator will also build virtual nodes and place them in a snapshot fork to evaluate scheduler predicates. Those temporary virtual nodes are built using the same pattern (template name and an index ordinal also starting at 0) as the one previously used by `getUpcomingNodeInfos`, which means it will generate the same nodeinfos/nodes names for nodegroups having upcoming nodes. But adding nodes by the same name in an existing cluster snapshot isn't allowed, and the evaluation attempt will fail. Practically this blocks re-upscales for nodegroups having upcoming nodes, which can cause a significant delay. --- cluster-autoscaler/core/static_autoscaler.go | 2 +- cluster-autoscaler/estimator/binpacking_estimator.go | 3 ++- cluster-autoscaler/utils/scheduler/scheduler.go | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 83d840265a75..da91119b3d83 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -799,7 +799,7 @@ func getUpcomingNodeInfos(registry *clusterstate.ClusterStateRegistry, nodeInfos // Ensure new nodes have different names because nodeName // will be used as a map key. Also deep copy pods (daemonsets & // any pods added by cloud provider on template). - upcomingNodes = append(upcomingNodes, scheduler_utils.DeepCopyTemplateNode(nodeTemplate, i)) + upcomingNodes = append(upcomingNodes, scheduler_utils.DeepCopyTemplateNode(nodeTemplate, fmt.Sprintf("upcoming-%d", i))) } } return upcomingNodes diff --git a/cluster-autoscaler/estimator/binpacking_estimator.go b/cluster-autoscaler/estimator/binpacking_estimator.go index b998143cce70..87482f4921f1 100644 --- a/cluster-autoscaler/estimator/binpacking_estimator.go +++ b/cluster-autoscaler/estimator/binpacking_estimator.go @@ -17,6 +17,7 @@ limitations under the License. package estimator import ( + "fmt" "sort" apiv1 "k8s.io/api/core/v1" @@ -113,7 +114,7 @@ func (estimator *BinpackingNodeEstimator) addNewNodeToSnapshot( template *schedulerframework.NodeInfo, nameIndex int) (string, error) { - newNodeInfo := scheduler.DeepCopyTemplateNode(template, nameIndex) + newNodeInfo := scheduler.DeepCopyTemplateNode(template, fmt.Sprintf("estimator-%d", nameIndex)) var pods []*apiv1.Pod for _, podInfo := range newNodeInfo.Pods { pods = append(pods, podInfo.Pod) diff --git a/cluster-autoscaler/utils/scheduler/scheduler.go b/cluster-autoscaler/utils/scheduler/scheduler.go index f3e4439f6748..f710114cf1a1 100644 --- a/cluster-autoscaler/utils/scheduler/scheduler.go +++ b/cluster-autoscaler/utils/scheduler/scheduler.go @@ -64,9 +64,9 @@ func CreateNodeNameToInfoMap(pods []*apiv1.Pod, nodes []*apiv1.Node) map[string] // DeepCopyTemplateNode copies NodeInfo object used as a template. It changes // names of UIDs of both node and pods running on it, so that copies can be used // to represent multiple nodes. -func DeepCopyTemplateNode(nodeTemplate *schedulerframework.NodeInfo, index int) *schedulerframework.NodeInfo { +func DeepCopyTemplateNode(nodeTemplate *schedulerframework.NodeInfo, suffix string) *schedulerframework.NodeInfo { node := nodeTemplate.Node().DeepCopy() - node.Name = fmt.Sprintf("%s-%d", node.Name, index) + node.Name = fmt.Sprintf("%s-%s", node.Name, suffix) node.UID = uuid.NewUUID() if node.Labels == nil { node.Labels = make(map[string]string) @@ -76,7 +76,7 @@ func DeepCopyTemplateNode(nodeTemplate *schedulerframework.NodeInfo, index int) nodeInfo.SetNode(node) for _, podInfo := range nodeTemplate.Pods { pod := podInfo.Pod.DeepCopy() - pod.Name = fmt.Sprintf("%s-%d", podInfo.Pod.Name, index) + pod.Name = fmt.Sprintf("%s-%s", podInfo.Pod.Name, suffix) pod.UID = uuid.NewUUID() nodeInfo.AddPod(pod) }