From a7eea5e4d07fcb577a0244091fcd22505d13ae42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Mon, 30 Sep 2024 21:20:49 +0200 Subject: [PATCH] CA: refactor utils related to NodeInfos simulator.BuildNodeInfoForNode, core_utils.GetNodeInfoFromTemplate, and scheduler_utils.DeepCopyTemplateNode all had very similar logic for sanitizing and copying NodeInfos. They're all consolidated to one file in simulator, sharing common logic. DeepCopyNodeInfo is changed to be a framework.NodeInfo method. MixedTemplateNodeInfoProvider now correctly uses ClusterSnapshot to correlate Nodes to scheduled pods, instead of using a live Pod lister. This means that the snapshot now has to be properly initialized in a bunch of tests. --- .../scaleup/orchestrator/async_initializer.go | 4 +- .../core/scaleup/orchestrator/orchestrator.go | 6 +- .../scaleup/orchestrator/orchestrator_test.go | 29 +- .../core/scaleup/resource/manager_test.go | 9 + cluster-autoscaler/core/static_autoscaler.go | 3 +- .../core/static_autoscaler_test.go | 42 +- cluster-autoscaler/core/utils/utils.go | 77 --- cluster-autoscaler/core/utils/utils_test.go | 29 - .../estimator/binpacking_estimator.go | 4 +- .../mixed_nodeinfos_processor.go | 42 +- .../mixed_nodeinfos_processor_test.go | 33 +- .../simulator/framework/infos.go | 17 + .../simulator/framework/infos_test.go | 58 ++ .../simulator/node_info_utils.go | 153 ++++++ .../simulator/node_info_utils_test.go | 510 ++++++++++++++++++ cluster-autoscaler/simulator/nodes.go | 71 --- cluster-autoscaler/simulator/nodes_test.go | 239 -------- .../utils/daemonset/daemonset.go | 5 + .../utils/scheduler/scheduler.go | 22 - 19 files changed, 836 insertions(+), 517 deletions(-) create mode 100644 cluster-autoscaler/simulator/node_info_utils.go create mode 100644 cluster-autoscaler/simulator/node_info_utils_test.go delete mode 100644 cluster-autoscaler/simulator/nodes.go delete mode 100644 cluster-autoscaler/simulator/nodes_test.go diff --git a/cluster-autoscaler/core/scaleup/orchestrator/async_initializer.go b/cluster-autoscaler/core/scaleup/orchestrator/async_initializer.go index de2dabf600bc..a8e82b87e7ba 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/async_initializer.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/async_initializer.go @@ -25,10 +25,10 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/context" - "k8s.io/autoscaler/cluster-autoscaler/core/utils" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/processors/status" + "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" @@ -110,7 +110,7 @@ func (s *AsyncNodeGroupInitializer) InitializeNodeGroup(result nodegroups.AsyncN mainCreatedNodeGroup := result.CreationResult.MainCreatedNodeGroup // If possible replace candidate node-info with node info based on crated node group. The latter // one should be more in line with nodes which will be created by node group. - nodeInfo, aErr := utils.GetNodeInfoFromTemplate(mainCreatedNodeGroup, s.daemonSets, s.taintConfig) + nodeInfo, aErr := simulator.TemplateNodeInfoFromNodeGroupTemplate(mainCreatedNodeGroup, s.daemonSets, s.taintConfig) if aErr != nil { klog.Warningf("Cannot build node info for newly created main node group %s. Using fallback. Error: %v", mainCreatedNodeGroup.Id(), aErr) nodeInfo = s.nodeInfo diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go index 8eb316c594b2..80020cf319e5 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go @@ -27,7 +27,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/context" "k8s.io/autoscaler/cluster-autoscaler/core/scaleup/equivalence" "k8s.io/autoscaler/cluster-autoscaler/core/scaleup/resource" - "k8s.io/autoscaler/cluster-autoscaler/core/utils" "k8s.io/autoscaler/cluster-autoscaler/estimator" "k8s.io/autoscaler/cluster-autoscaler/expander" "k8s.io/autoscaler/cluster-autoscaler/metrics" @@ -35,6 +34,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/processors/status" + "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/klogx" @@ -527,7 +527,7 @@ func (o *ScaleUpOrchestrator) CreateNodeGroup( // If possible replace candidate node-info with node info based on crated node group. The latter // one should be more in line with nodes which will be created by node group. - mainCreatedNodeInfo, aErr := utils.GetNodeInfoFromTemplate(createNodeGroupResult.MainCreatedNodeGroup, daemonSets, o.taintConfig) + mainCreatedNodeInfo, aErr := simulator.TemplateNodeInfoFromNodeGroupTemplate(createNodeGroupResult.MainCreatedNodeGroup, daemonSets, o.taintConfig) if aErr == nil { nodeInfos[createNodeGroupResult.MainCreatedNodeGroup.Id()] = mainCreatedNodeInfo schedulablePodGroups[createNodeGroupResult.MainCreatedNodeGroup.Id()] = o.SchedulablePodGroups(podEquivalenceGroups, createNodeGroupResult.MainCreatedNodeGroup, mainCreatedNodeInfo) @@ -542,7 +542,7 @@ func (o *ScaleUpOrchestrator) CreateNodeGroup( delete(schedulablePodGroups, oldId) } for _, nodeGroup := range createNodeGroupResult.ExtraCreatedNodeGroups { - nodeInfo, aErr := utils.GetNodeInfoFromTemplate(nodeGroup, daemonSets, o.taintConfig) + nodeInfo, aErr := simulator.TemplateNodeInfoFromNodeGroupTemplate(nodeGroup, daemonSets, o.taintConfig) if aErr != nil { klog.Warningf("Cannot build node info for newly created extra node group %v; balancing similar node groups will not work; err=%v", nodeGroup.Id(), aErr) continue diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go index a1cdd15eba91..bd270be6774a 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go @@ -1049,6 +1049,8 @@ func runSimpleScaleUpTest(t *testing.T, config *ScaleUpTestConfig) *ScaleUpTestR // build orchestrator context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil) assert.NoError(t, err) + err = context.ClusterSnapshot.SetClusterState(nodes, kube_util.ScheduledPods(pods)) + assert.NoError(t, err) nodeInfos, err := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false). Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) @@ -1130,13 +1132,15 @@ func TestScaleUpUnhealthy(t *testing.T) { SetNodeReadyState(n1, true, someTimeAgo) n2 := BuildTestNode("n2", 1000, 1000) SetNodeReadyState(n2, true, someTimeAgo) + nodes := []*apiv1.Node{n1, n2} p1 := BuildTestPod("p1", 80, 0) p2 := BuildTestPod("p2", 800, 0) p1.Spec.NodeName = "n1" p2.Spec.NodeName = "n2" + pods := []*apiv1.Pod{p1, p2} - podLister := kube_util.NewTestPodLister([]*apiv1.Pod{p1, p2}) + podLister := kube_util.NewTestPodLister(pods) listers := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil) provider := testprovider.NewTestCloudProvider(func(nodeGroup string, increase int) error { @@ -1155,8 +1159,8 @@ func TestScaleUpUnhealthy(t *testing.T) { } context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil) assert.NoError(t, err) - - nodes := []*apiv1.Node{n1, n2} + err = context.ClusterSnapshot.SetClusterState(nodes, pods) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) @@ -1198,7 +1202,8 @@ func TestBinpackingLimiter(t *testing.T) { context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil) assert.NoError(t, err) - + err = context.ClusterSnapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) nodeInfos, err := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false). Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) @@ -1233,11 +1238,13 @@ func TestScaleUpNoHelp(t *testing.T) { n1 := BuildTestNode("n1", 100, 1000) now := time.Now() SetNodeReadyState(n1, true, now.Add(-2*time.Minute)) + nodes := []*apiv1.Node{n1} p1 := BuildTestPod("p1", 80, 0) p1.Spec.NodeName = "n1" + pods := []*apiv1.Pod{p1} - podLister := kube_util.NewTestPodLister([]*apiv1.Pod{p1}) + podLister := kube_util.NewTestPodLister(pods) listers := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil) provider := testprovider.NewTestCloudProvider(func(nodeGroup string, increase int) error { @@ -1255,8 +1262,8 @@ func TestScaleUpNoHelp(t *testing.T) { } context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil) assert.NoError(t, err) - - nodes := []*apiv1.Node{n1} + err = context.ClusterSnapshot.SetClusterState(nodes, pods) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) @@ -1410,7 +1417,8 @@ func TestComputeSimilarNodeGroups(t *testing.T) { listers := kube_util.NewListerRegistry(nil, nil, kube_util.NewTestPodLister(nil), nil, nil, nil, nil, nil, nil) ctx, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{BalanceSimilarNodeGroups: tc.balancingEnabled}, &fake.Clientset{}, listers, provider, nil, nil) assert.NoError(t, err) - + err = ctx.ClusterSnapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) assert.NoError(t, clusterState.UpdateNodes(nodes, nodeInfos, time.Now())) @@ -1474,7 +1482,8 @@ func TestScaleUpBalanceGroups(t *testing.T) { } context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil) assert.NoError(t, err) - + err = context.ClusterSnapshot.SetClusterState(nodes, podList) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) @@ -1650,6 +1659,8 @@ func TestScaleUpToMeetNodeGroupMinSize(t *testing.T) { assert.NoError(t, err) nodes := []*apiv1.Node{n1, n2} + err = context.ClusterSnapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now()) processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) diff --git a/cluster-autoscaler/core/scaleup/resource/manager_test.go b/cluster-autoscaler/core/scaleup/resource/manager_test.go index ac1204be1b55..3824edb4dcbc 100644 --- a/cluster-autoscaler/core/scaleup/resource/manager_test.go +++ b/cluster-autoscaler/core/scaleup/resource/manager_test.go @@ -22,6 +22,7 @@ import ( "time" "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" @@ -73,6 +74,8 @@ func TestDeltaForNode(t *testing.T) { ng := testCase.nodeGroupConfig group, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem) + err := ctx.ClusterSnapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now()) rm := NewManager(processors.CustomResourcesProcessor) @@ -114,6 +117,8 @@ func TestResourcesLeft(t *testing.T) { ng := testCase.nodeGroupConfig _, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem) + err := ctx.ClusterSnapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now()) rm := NewManager(processors.CustomResourcesProcessor) @@ -165,6 +170,8 @@ func TestApplyLimits(t *testing.T) { ng := testCase.nodeGroupConfig group, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem) + err := ctx.ClusterSnapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now()) rm := NewManager(processors.CustomResourcesProcessor) @@ -230,6 +237,8 @@ func TestResourceManagerWithGpuResource(t *testing.T) { assert.NoError(t, err) nodes := []*corev1.Node{n1} + err = context.ClusterSnapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now()) rm := NewManager(processors.CustomResourcesProcessor) diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 6902b13df35c..280a1f589a28 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -52,7 +52,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/backoff" caerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" - scheduler_utils "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" "k8s.io/utils/integer" @@ -1028,7 +1027,7 @@ func getUpcomingNodeInfos(upcomingCounts map[string]int, nodeInfos map[string]*f // Ensure new nodes have different names because nodeName // will be used as a map key. Also deep copy pods (daemonsets & // any pods added by cloud provider on template). - nodes = append(nodes, scheduler_utils.DeepCopyTemplateNode(nodeTemplate, fmt.Sprintf("upcoming-%d", i))) + nodes = append(nodes, simulator.FreshNodeInfoFromTemplateNodeInfo(nodeTemplate, fmt.Sprintf("upcoming-%d", i))) } upcomingNodes[nodeGroup] = nodes } diff --git a/cluster-autoscaler/core/static_autoscaler_test.go b/cluster-autoscaler/core/static_autoscaler_test.go index 2df10d4b7355..00ff2a8c6338 100644 --- a/cluster-autoscaler/core/static_autoscaler_test.go +++ b/cluster-autoscaler/core/static_autoscaler_test.go @@ -64,6 +64,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" kube_record "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" appsv1 "k8s.io/api/apps/v1" @@ -78,7 +79,6 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" - klog "k8s.io/klog/v2" ) type podListerMock struct { @@ -406,7 +406,7 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { // MaxNodesTotal reached. readyNodeLister.SetNodes([]*apiv1.Node{n1}) allNodeLister.SetNodes([]*apiv1.Node{n1}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -417,7 +417,7 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { // Scale up. readyNodeLister.SetNodes([]*apiv1.Node{n1}) allNodeLister.SetNodes([]*apiv1.Node{n1}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() onScaleUpMock.On("ScaleUp", "ng1", 1).Return(nil).Once() @@ -431,7 +431,7 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { // Mark unneeded nodes. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -446,7 +446,7 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { // Scale down. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Times(3) + allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Twice() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() onScaleDownMock.On("ScaleDown", "ng1", "n2").Return(nil).Once() @@ -460,7 +460,7 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { // Mark unregistered nodes. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -475,7 +475,7 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { // Remove unregistered nodes. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() onScaleDownMock.On("ScaleDown", "ng2", "n3").Return(nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -489,7 +489,7 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { // Scale up to node group min size. readyNodeLister.SetNodes([]*apiv1.Node{n4}) allNodeLister.SetNodes([]*apiv1.Node{n4}) - allPodListerMock.On("List").Return([]*apiv1.Pod{}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil) podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil) onScaleUpMock.On("ScaleUp", "ng3", 2).Return(nil).Once() // 2 new nodes are supposed to be scaled up. @@ -689,7 +689,7 @@ func TestStaticAutoscalerRunOnceWithScaleDownDelayPerNG(t *testing.T) { // Mark unneeded nodes. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -701,7 +701,7 @@ func TestStaticAutoscalerRunOnceWithScaleDownDelayPerNG(t *testing.T) { // Scale down nodegroup readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Times(3) + allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Twice() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil) onScaleDownMock.On("ScaleDown", tc.expectedScaleDownNG, tc.expectedScaleDownNode).Return(nil).Once() @@ -828,7 +828,7 @@ func TestStaticAutoscalerRunOnceWithAutoprovisionedEnabled(t *testing.T) { // Scale up. readyNodeLister.SetNodes([]*apiv1.Node{n1}) allNodeLister.SetNodes([]*apiv1.Node{n1}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() onNodeGroupCreateMock.On("Create", "autoprovisioned-TN2").Return(nil).Once() @@ -845,7 +845,7 @@ func TestStaticAutoscalerRunOnceWithAutoprovisionedEnabled(t *testing.T) { // Remove autoprovisioned node group and mark unneeded nodes. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() onNodeGroupDeleteMock.On("Delete", "autoprovisioned-TN1").Return(nil).Once() @@ -861,7 +861,7 @@ func TestStaticAutoscalerRunOnceWithAutoprovisionedEnabled(t *testing.T) { // Scale down. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Times(3) + allPodListerMock.On("List").Return([]*apiv1.Pod{p1}, nil).Twice() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() onNodeGroupDeleteMock.On("Delete", "autoprovisioned-"+ @@ -984,7 +984,7 @@ func TestStaticAutoscalerRunOnceWithALongUnregisteredNode(t *testing.T) { // Scale up. readyNodeLister.SetNodes(nodes) allNodeLister.SetNodes(nodes) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() onScaleUpMock.On("ScaleUp", "ng1", 1).Return(nil).Once() @@ -1002,7 +1002,7 @@ func TestStaticAutoscalerRunOnceWithALongUnregisteredNode(t *testing.T) { // Remove broken node readyNodeLister.SetNodes(nodes) allNodeLister.SetNodes(nodes) - allPodListerMock.On("List").Return([]*apiv1.Pod{}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{}, nil).Once() onScaleDownMock.On("ScaleDown", "ng1", "broken").Return(nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -1137,7 +1137,7 @@ func TestStaticAutoscalerRunOncePodsWithPriorities(t *testing.T) { // Scale up readyNodeLister.SetNodes([]*apiv1.Node{n1, n2, n3}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2, n3}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2, p3, p4, p5, p6}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2, p3, p4, p5, p6}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() onScaleUpMock.On("ScaleUp", "ng2", 1).Return(nil).Once() @@ -1150,7 +1150,7 @@ func TestStaticAutoscalerRunOncePodsWithPriorities(t *testing.T) { // Mark unneeded nodes. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2, n3}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2, n3}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2, p3, p4, p5}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2, p3, p4, p5}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -1164,7 +1164,7 @@ func TestStaticAutoscalerRunOncePodsWithPriorities(t *testing.T) { // Scale down. readyNodeLister.SetNodes([]*apiv1.Node{n1, n2, n3}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2, n3}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2, p3, p4, p5}, nil).Times(3) + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2, p3, p4, p5}, nil).Twice() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() onScaleDownMock.On("ScaleDown", "ng1", "n1").Return(nil).Once() @@ -1266,7 +1266,7 @@ func TestStaticAutoscalerRunOnceWithFilteringOnBinPackingEstimator(t *testing.T) // Scale up readyNodeLister.SetNodes([]*apiv1.Node{n1, n2}) allNodeLister.SetNodes([]*apiv1.Node{n1, n2}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p3, p4}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p3, p4}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -1365,7 +1365,7 @@ func TestStaticAutoscalerRunOnceWithFilteringOnUpcomingNodesEnabledNoScaleUp(t * // Scale up readyNodeLister.SetNodes([]*apiv1.Node{n2, n3}) allNodeLister.SetNodes([]*apiv1.Node{n2, n3}) - allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2, p3}, nil).Twice() + allPodListerMock.On("List").Return([]*apiv1.Pod{p1, p2, p3}, nil).Once() daemonSetListerMock.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() podDisruptionBudgetListerMock.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() @@ -1566,7 +1566,7 @@ func TestStaticAutoscalerRunOnceWithBypassedSchedulers(t *testing.T) { tc.setupConfig.mocks.readyNodeLister.SetNodes([]*apiv1.Node{n1}) tc.setupConfig.mocks.allNodeLister.SetNodes([]*apiv1.Node{n1}) - tc.setupConfig.mocks.allPodLister.On("List").Return(tc.pods, nil).Twice() + tc.setupConfig.mocks.allPodLister.On("List").Return(tc.pods, nil).Once() tc.setupConfig.mocks.daemonSetLister.On("List", labels.Everything()).Return([]*appsv1.DaemonSet{}, nil).Once() tc.setupConfig.mocks.podDisruptionBudgetLister.On("List").Return([]*policyv1.PodDisruptionBudget{}, nil).Once() if tc.expectedScaleUp != nil { diff --git a/cluster-autoscaler/core/utils/utils.go b/cluster-autoscaler/core/utils/utils.go index c25db2ef8453..1b493b783cfc 100644 --- a/cluster-autoscaler/core/utils/utils.go +++ b/cluster-autoscaler/core/utils/utils.go @@ -17,52 +17,17 @@ limitations under the License. package utils import ( - "fmt" - "math/rand" "reflect" "time" - appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/clusterstate" "k8s.io/autoscaler/cluster-autoscaler/metrics" - "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/utils/daemonset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" - "k8s.io/autoscaler/cluster-autoscaler/utils/labels" - "k8s.io/autoscaler/cluster-autoscaler/utils/taints" ) -// GetNodeInfoFromTemplate returns NodeInfo object built base on TemplateNodeInfo returned by NodeGroup.TemplateNodeInfo(). -func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*appsv1.DaemonSet, taintConfig taints.TaintConfig) (*framework.NodeInfo, errors.AutoscalerError) { - id := nodeGroup.Id() - baseNodeInfo, err := nodeGroup.TemplateNodeInfo() - if err != nil { - return nil, errors.ToAutoscalerError(errors.CloudProviderError, err) - } - - labels.UpdateDeprecatedLabels(baseNodeInfo.Node().ObjectMeta.Labels) - - sanitizedNode, typedErr := SanitizeNode(baseNodeInfo.Node(), id, taintConfig) - if err != nil { - return nil, typedErr - } - baseNodeInfo.SetNode(sanitizedNode) - - pods, err := daemonset.GetDaemonSetPodsForNode(baseNodeInfo, daemonsets) - if err != nil { - return nil, errors.ToAutoscalerError(errors.InternalError, err) - } - for _, podInfo := range baseNodeInfo.Pods() { - pods = append(pods, &framework.PodInfo{Pod: podInfo.Pod}) - } - - sanitizedNodeInfo := framework.NewNodeInfo(sanitizedNode, nil, SanitizePods(pods, sanitizedNode)...) - return sanitizedNodeInfo, nil -} - // isVirtualNode determines if the node is created by virtual kubelet func isVirtualNode(node *apiv1.Node) bool { return node.ObjectMeta.Labels["type"] == "virtual-kubelet" @@ -89,48 +54,6 @@ func FilterOutNodesFromNotAutoscaledGroups(nodes []*apiv1.Node, cloudProvider cl return result, nil } -// DeepCopyNodeInfo clones the provided nodeInfo -func DeepCopyNodeInfo(nodeInfo *framework.NodeInfo) *framework.NodeInfo { - newPods := make([]*framework.PodInfo, 0) - for _, podInfo := range nodeInfo.Pods() { - newPods = append(newPods, &framework.PodInfo{Pod: podInfo.Pod.DeepCopy()}) - } - - // Build a new node info. - newNodeInfo := framework.NewNodeInfo(nodeInfo.Node().DeepCopy(), nil, newPods...) - return newNodeInfo -} - -// SanitizeNode cleans up nodes used for node group templates -func SanitizeNode(node *apiv1.Node, nodeGroup string, taintConfig taints.TaintConfig) (*apiv1.Node, errors.AutoscalerError) { - newNode := node.DeepCopy() - nodeName := fmt.Sprintf("template-node-for-%s-%d", nodeGroup, rand.Int63()) - newNode.Labels = make(map[string]string, len(node.Labels)) - for k, v := range node.Labels { - if k != apiv1.LabelHostname { - newNode.Labels[k] = v - } else { - newNode.Labels[k] = nodeName - } - } - newNode.Name = nodeName - newNode.Spec.Taints = taints.SanitizeTaints(newNode.Spec.Taints, taintConfig) - return newNode, nil -} - -// SanitizePods cleans up pods used for node group templates -func SanitizePods(pods []*framework.PodInfo, sanitizedNode *apiv1.Node) []*framework.PodInfo { - // Update node name in pods. - sanitizedPods := make([]*framework.PodInfo, 0) - for _, pod := range pods { - sanitizedPod := pod.Pod.DeepCopy() - sanitizedPod.Spec.NodeName = sanitizedNode.Name - sanitizedPods = append(sanitizedPods, &framework.PodInfo{Pod: sanitizedPod}) - } - - return sanitizedPods -} - func hasHardInterPodAffinity(affinity *apiv1.Affinity) bool { if affinity == nil { return false diff --git a/cluster-autoscaler/core/utils/utils_test.go b/cluster-autoscaler/core/utils/utils_test.go index b63badbcc834..2613b0419a13 100644 --- a/cluster-autoscaler/core/utils/utils_test.go +++ b/cluster-autoscaler/core/utils/utils_test.go @@ -20,8 +20,6 @@ import ( "testing" "time" - "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "github.com/stretchr/testify/assert" @@ -29,33 +27,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestSanitizePods(t *testing.T) { - pod := BuildTestPod("p1", 80, 0) - pod.Spec.NodeName = "n1" - pods := []*framework.PodInfo{{Pod: pod}} - - node := BuildTestNode("node", 1000, 1000) - - resNode, err := SanitizeNode(node, "test-group", taints.TaintConfig{}) - assert.NoError(t, err) - res := SanitizePods(pods, resNode) - assert.Equal(t, 1, len(res)) -} - -func TestSanitizeLabels(t *testing.T) { - oldNode := BuildTestNode("ng1-1", 1000, 1000) - oldNode.Labels = map[string]string{ - apiv1.LabelHostname: "abc", - "x": "y", - } - node, err := SanitizeNode(oldNode, "bzium", taints.TaintConfig{}) - assert.NoError(t, err) - assert.NotEqual(t, node.Labels[apiv1.LabelHostname], "abc", nil) - assert.Equal(t, node.Labels["x"], "y") - assert.NotEqual(t, node.Name, oldNode.Name) - assert.Equal(t, node.Labels[apiv1.LabelHostname], node.Name) -} - func TestGetNodeResource(t *testing.T) { node := BuildTestNode("n1", 1000, 2*MiB) diff --git a/cluster-autoscaler/estimator/binpacking_estimator.go b/cluster-autoscaler/estimator/binpacking_estimator.go index 55e1de431997..1f602348df50 100644 --- a/cluster-autoscaler/estimator/binpacking_estimator.go +++ b/cluster-autoscaler/estimator/binpacking_estimator.go @@ -21,10 +21,10 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + core_utils "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" - "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" "k8s.io/klog/v2" ) @@ -210,7 +210,7 @@ func (e *BinpackingNodeEstimator) addNewNodeToSnapshot( estimationState *estimationState, template *framework.NodeInfo, ) error { - newNodeInfo := scheduler.DeepCopyTemplateNode(template, fmt.Sprintf("e-%d", estimationState.newNodeNameIndex)) + newNodeInfo := core_utils.FreshNodeInfoFromTemplateNodeInfo(template, fmt.Sprintf("e-%d", estimationState.newNodeNameIndex)) if err := e.clusterSnapshot.AddNodeInfo(newNodeInfo); err != nil { return err } diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go index 8b0ebd58571a..34f486392099 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go @@ -24,7 +24,6 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/context" - "k8s.io/autoscaler/cluster-autoscaler/core/utils" "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" @@ -78,11 +77,6 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext, result := make(map[string]*framework.NodeInfo) seenGroups := make(map[string]bool) - podsForNodes, err := getPodsForNodes(ctx.ListerRegistry) - if err != nil { - return map[string]*framework.NodeInfo{}, err - } - // processNode returns information whether the nodeTemplate was generated and if there was an error. processNode := func(node *apiv1.Node) (bool, string, errors.AutoscalerError) { nodeGroup, err := ctx.CloudProvider.NodeGroupForNode(node) @@ -94,22 +88,15 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext, } id := nodeGroup.Id() if _, found := result[id]; !found { - // Build nodeInfo. - sanitizedNode, err := utils.SanitizeNode(node, id, taintConfig) + nodeInfo, err := ctx.ClusterSnapshot.GetNodeInfo(node.Name) if err != nil { - return false, "", err + return false, "", errors.NewAutoscalerError(errors.InternalError, "error while retrieving node %s from cluster snapshot - this shouldn't happen: %v", node.Name, err) } - nodeInfo, err := simulator.BuildNodeInfoForNode(sanitizedNode, podsForNodes[node.Name], daemonsets, p.forceDaemonSets) + templateNodeInfo, caErr := simulator.TemplateNodeInfoFromExampleNodeInfo(nodeInfo, id, daemonsets, p.forceDaemonSets, taintConfig) if err != nil { - return false, "", err - } - - var pods []*apiv1.Pod - for _, podInfo := range nodeInfo.Pods() { - pods = append(pods, podInfo.Pod) + return false, "", caErr } - sanitizedNodeInfo := framework.NewNodeInfo(sanitizedNode, nil, utils.SanitizePods(nodeInfo.Pods(), sanitizedNode)...) - result[id] = sanitizedNodeInfo + result[id] = templateNodeInfo return true, id, nil } return false, "", nil @@ -125,7 +112,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext, return map[string]*framework.NodeInfo{}, typedErr } if added && p.nodeInfoCache != nil { - nodeInfoCopy := utils.DeepCopyNodeInfo(result[id]) + nodeInfoCopy := result[id].DeepCopy() p.nodeInfoCache[id] = cacheItem{NodeInfo: nodeInfoCopy, added: time.Now()} } } @@ -142,7 +129,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext, if p.isCacheItemExpired(cacheItem.added) { delete(p.nodeInfoCache, id) } else { - result[id] = utils.DeepCopyNodeInfo(cacheItem.NodeInfo) + result[id] = cacheItem.NodeInfo.DeepCopy() continue } } @@ -150,7 +137,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext, // No good template, trying to generate one. This is called only if there are no // working nodes in the node groups. By default CA tries to use a real-world example. - nodeInfo, err := utils.GetNodeInfoFromTemplate(nodeGroup, daemonsets, taintConfig) + nodeInfo, err := simulator.TemplateNodeInfoFromNodeGroupTemplate(nodeGroup, daemonsets, taintConfig) if err != nil { if err == cloudprovider.ErrNotImplemented { continue @@ -192,19 +179,6 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext, return result, nil } -func getPodsForNodes(listers kube_util.ListerRegistry) (map[string][]*apiv1.Pod, errors.AutoscalerError) { - pods, err := listers.AllPodLister().List() - if err != nil { - return nil, errors.ToAutoscalerError(errors.ApiCallError, err) - } - scheduledPods := kube_util.ScheduledPods(pods) - podsForNodes := map[string][]*apiv1.Pod{} - for _, p := range scheduledPods { - podsForNodes[p.Spec.NodeName] = append(podsForNodes[p.Spec.NodeName], p) - } - return podsForNodes, nil -} - func isNodeGoodTemplateCandidate(node *apiv1.Node, now time.Time) bool { ready, lastTransitionTime, _ := kube_util.GetReadinessState(node) stable := lastTransitionTime.Add(stabilizationDelay).Before(now) diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go index 68e04752a8dc..cd3eccc390af 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go @@ -22,6 +22,7 @@ import ( testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" "k8s.io/autoscaler/cluster-autoscaler/context" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -30,6 +31,7 @@ import ( schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" ) @@ -81,14 +83,20 @@ func TestGetNodeInfosForGroups(t *testing.T) { predicateChecker, err := predicatechecker.NewTestPredicateChecker() assert.NoError(t, err) + nodes := []*apiv1.Node{justReady5, unready4, unready3, ready2, ready1} + snapshot := clustersnapshot.NewBasicClusterSnapshot() + err = snapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) + ctx := context.AutoscalingContext{ CloudProvider: provider1, + ClusterSnapshot: snapshot, PredicateChecker: predicateChecker, AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, }, } - res, err := NewMixedTemplateNodeInfoProvider(&cacheTtl, false).Process(&ctx, []*apiv1.Node{justReady5, unready4, unready3, ready2, ready1}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) + res, err := NewMixedTemplateNodeInfoProvider(&cacheTtl, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) assert.Equal(t, 5, len(res)) info, found := res["ng1"] @@ -110,6 +118,7 @@ func TestGetNodeInfosForGroups(t *testing.T) { // Test for a nodegroup without nodes and TemplateNodeInfo not implemented by cloud proivder ctx = context.AutoscalingContext{ CloudProvider: provider2, + ClusterSnapshot: clustersnapshot.NewBasicClusterSnapshot(), PredicateChecker: predicateChecker, AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, @@ -165,16 +174,22 @@ func TestGetNodeInfosForGroupsCache(t *testing.T) { predicateChecker, err := predicatechecker.NewTestPredicateChecker() assert.NoError(t, err) + nodes := []*apiv1.Node{unready4, unready3, ready2, ready1} + snapshot := clustersnapshot.NewBasicClusterSnapshot() + err = snapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) + // Fill cache ctx := context.AutoscalingContext{ CloudProvider: provider1, + ClusterSnapshot: snapshot, PredicateChecker: predicateChecker, AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, }, } niProcessor := NewMixedTemplateNodeInfoProvider(&cacheTtl, false) - res, err := niProcessor.Process(&ctx, []*apiv1.Node{unready4, unready3, ready2, ready1}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) + res, err := niProcessor.Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) // Check results assert.Equal(t, 4, len(res)) @@ -208,7 +223,7 @@ func TestGetNodeInfosForGroupsCache(t *testing.T) { assert.Equal(t, "ng3", lastDeletedGroup) // Check cache with all nodes removed - res, err = niProcessor.Process(&ctx, []*apiv1.Node{unready4, unready3, ready2, ready1}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) + res, err = niProcessor.Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) // Check results assert.Equal(t, 2, len(res)) @@ -229,7 +244,7 @@ func TestGetNodeInfosForGroupsCache(t *testing.T) { // Fill cache manually infoNg4Node6 := framework.NewTestNodeInfo(ready6.DeepCopy()) niProcessor.nodeInfoCache = map[string]cacheItem{"ng4": {NodeInfo: infoNg4Node6, added: now}} - res, err = niProcessor.Process(&ctx, []*apiv1.Node{unready4, unready3, ready2, ready1}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) + res, err = niProcessor.Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) // Check if cache was used assert.NoError(t, err) assert.Equal(t, 2, len(res)) @@ -253,8 +268,14 @@ func TestGetNodeInfosCacheExpired(t *testing.T) { predicateChecker, err := predicatechecker.NewTestPredicateChecker() assert.NoError(t, err) + nodes := []*apiv1.Node{ready1} + snapshot := clustersnapshot.NewBasicClusterSnapshot() + err = snapshot.SetClusterState(nodes, nil) + assert.NoError(t, err) + ctx := context.AutoscalingContext{ CloudProvider: provider, + ClusterSnapshot: snapshot, PredicateChecker: predicateChecker, AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, @@ -272,7 +293,7 @@ func TestGetNodeInfosCacheExpired(t *testing.T) { provider.AddNode("ng1", ready1) assert.Equal(t, 2, len(niProcessor1.nodeInfoCache)) - _, err = niProcessor1.Process(&ctx, []*apiv1.Node{ready1}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) + _, err = niProcessor1.Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) assert.Equal(t, 1, len(niProcessor1.nodeInfoCache)) @@ -283,7 +304,7 @@ func TestGetNodeInfosCacheExpired(t *testing.T) { "ng2": {NodeInfo: tni, added: now.Add(-2 * time.Second)}, } assert.Equal(t, 2, len(niProcessor2.nodeInfoCache)) - _, err = niProcessor1.Process(&ctx, []*apiv1.Node{ready1}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) + _, err = niProcessor1.Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) assert.NoError(t, err) assert.Equal(t, 2, len(niProcessor2.nodeInfoCache)) diff --git a/cluster-autoscaler/simulator/framework/infos.go b/cluster-autoscaler/simulator/framework/infos.go index c3af45d08258..c9062d170f8e 100644 --- a/cluster-autoscaler/simulator/framework/infos.go +++ b/cluster-autoscaler/simulator/framework/infos.go @@ -93,6 +93,23 @@ func (n *NodeInfo) ToScheduler() *schedulerframework.NodeInfo { return n.schedNodeInfo } +// DeepCopy clones the NodeInfo. +func (n *NodeInfo) DeepCopy() *NodeInfo { + var newPods []*PodInfo + for _, podInfo := range n.Pods() { + var newClaims []*resourceapi.ResourceClaim + for _, claim := range podInfo.NeededResourceClaims { + newClaims = append(newClaims, claim.DeepCopy()) + } + newPods = append(newPods, &PodInfo{Pod: podInfo.Pod.DeepCopy(), NeededResourceClaims: newClaims}) + } + var newSlices []*resourceapi.ResourceSlice + for _, slice := range n.LocalResourceSlices { + newSlices = append(newSlices, slice.DeepCopy()) + } + return NewNodeInfo(n.Node().DeepCopy(), newSlices, newPods...) +} + // NewNodeInfo returns a new internal NodeInfo from the provided data. func NewNodeInfo(node *apiv1.Node, slices []*resourceapi.ResourceSlice, pods ...*PodInfo) *NodeInfo { result := &NodeInfo{ diff --git a/cluster-autoscaler/simulator/framework/infos_test.go b/cluster-autoscaler/simulator/framework/infos_test.go index e6f997129253..be92e6f762ea 100644 --- a/cluster-autoscaler/simulator/framework/infos_test.go +++ b/cluster-autoscaler/simulator/framework/infos_test.go @@ -208,6 +208,64 @@ func TestNodeInfo(t *testing.T) { } } +func TestDeepCopyNodeInfo(t *testing.T) { + node := test.BuildTestNode("node", 1000, 1000) + pods := []*PodInfo{ + {Pod: test.BuildTestPod("p1", 80, 0, test.WithNodeName(node.Name))}, + { + Pod: test.BuildTestPod("p2", 80, 0, test.WithNodeName(node.Name)), + NeededResourceClaims: []*resourceapi.ResourceClaim{ + {ObjectMeta: v1.ObjectMeta{Name: "claim1"}, Spec: resourceapi.ResourceClaimSpec{Devices: resourceapi.DeviceClaim{Requests: []resourceapi.DeviceRequest{{Name: "req1"}}}}}, + {ObjectMeta: v1.ObjectMeta{Name: "claim2"}, Spec: resourceapi.ResourceClaimSpec{Devices: resourceapi.DeviceClaim{Requests: []resourceapi.DeviceRequest{{Name: "req2"}}}}}, + }, + }, + } + slices := []*resourceapi.ResourceSlice{ + {ObjectMeta: v1.ObjectMeta{Name: "slice1"}, Spec: resourceapi.ResourceSliceSpec{NodeName: "node"}}, + {ObjectMeta: v1.ObjectMeta{Name: "slice2"}, Spec: resourceapi.ResourceSliceSpec{NodeName: "node"}}, + } + nodeInfo := NewNodeInfo(node, slices, pods...) + + // Verify that the contents are identical after copying. + nodeInfoCopy := nodeInfo.DeepCopy() + if diff := cmp.Diff(nodeInfo, nodeInfoCopy, + cmp.AllowUnexported(schedulerframework.NodeInfo{}, NodeInfo{}, PodInfo{}, podExtraInfo{}), + // We don't care about this field staying the same, and it differs because it's a global counter bumped + // on every AddPod. + cmpopts.IgnoreFields(schedulerframework.NodeInfo{}, "Generation"), + ); diff != "" { + t.Errorf("nodeInfo differs after DeepCopyNodeInfo, diff (-want +got): %s", diff) + } + + // Verify that the object addresses changed in the copy. + if nodeInfo == nodeInfoCopy { + t.Error("nodeInfo address identical after DeepCopyNodeInfo") + } + if nodeInfo.ToScheduler() == nodeInfoCopy.ToScheduler() { + t.Error("schedulerframework.NodeInfo address identical after DeepCopyNodeInfo") + } + for i := range len(nodeInfo.LocalResourceSlices) { + if nodeInfo.LocalResourceSlices[i] == nodeInfoCopy.LocalResourceSlices[i] { + t.Errorf("%d-th LocalResourceSlice address identical after DeepCopyNodeInfo", i) + } + } + for podIndex := range len(pods) { + oldPodInfo := nodeInfo.Pods()[podIndex] + newPodInfo := nodeInfoCopy.Pods()[podIndex] + if oldPodInfo == newPodInfo { + t.Errorf("%d-th PodInfo address identical after DeepCopyNodeInfo", podIndex) + } + if oldPodInfo.Pod == newPodInfo.Pod { + t.Errorf("%d-th PodInfo.Pod address identical after DeepCopyNodeInfo", podIndex) + } + for claimIndex := range len(newPodInfo.NeededResourceClaims) { + if oldPodInfo.NeededResourceClaims[podIndex] == newPodInfo.NeededResourceClaims[podIndex] { + t.Errorf("%d-th PodInfo - %d-th NeededResourceClaim address identical after DeepCopyNodeInfo", podIndex, claimIndex) + } + } + } +} + func testPodInfos(pods []*apiv1.Pod, addClaims bool) []*PodInfo { var result []*PodInfo for _, pod := range pods { diff --git a/cluster-autoscaler/simulator/node_info_utils.go b/cluster-autoscaler/simulator/node_info_utils.go new file mode 100644 index 000000000000..943893db67a3 --- /dev/null +++ b/cluster-autoscaler/simulator/node_info_utils.go @@ -0,0 +1,153 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package simulator + +import ( + "fmt" + "math/rand" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/autoscaler/cluster-autoscaler/utils/daemonset" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/autoscaler/cluster-autoscaler/utils/labels" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" + "k8s.io/autoscaler/cluster-autoscaler/utils/taints" +) + +type nodeGroupTemplateNodeInfoGetter interface { + Id() string + TemplateNodeInfo() (*framework.NodeInfo, error) +} + +// TemplateNodeInfoFromNodeGroupTemplate returns a template NodeInfo object based on NodeGroup.TemplateNodeInfo(). The template is sanitized, and only +// contains the pods that should appear on a new Node from the same node group (e.g. DaemonSet pods). +func TemplateNodeInfoFromNodeGroupTemplate(nodeGroup nodeGroupTemplateNodeInfoGetter, daemonsets []*appsv1.DaemonSet, taintConfig taints.TaintConfig) (*framework.NodeInfo, errors.AutoscalerError) { + id := nodeGroup.Id() + baseNodeInfo, err := nodeGroup.TemplateNodeInfo() + if err != nil { + return nil, errors.ToAutoscalerError(errors.CloudProviderError, err) + } + labels.UpdateDeprecatedLabels(baseNodeInfo.Node().ObjectMeta.Labels) + + return TemplateNodeInfoFromExampleNodeInfo(baseNodeInfo, id, daemonsets, true, taintConfig) +} + +// TemplateNodeInfoFromExampleNodeInfo returns a template NodeInfo object based on a real example NodeInfo from the cluster. The template is sanitized, and only +// contains the pods that should appear on a new Node from the same node group (e.g. DaemonSet pods). +func TemplateNodeInfoFromExampleNodeInfo(example *framework.NodeInfo, nodeGroupId string, daemonsets []*appsv1.DaemonSet, forceDaemonSets bool, taintConfig taints.TaintConfig) (*framework.NodeInfo, errors.AutoscalerError) { + randSuffix := fmt.Sprintf("%d", rand.Int63()) + newNodeNameBase := fmt.Sprintf("template-node-for-%s", nodeGroupId) + + // We need to sanitize the example before determining the DS pods, since taints are checked there, and + // we might need to filter some out during sanitization. + sanitizedExample := sanitizeNodeInfo(example, newNodeNameBase, randSuffix, &taintConfig) + expectedPods, err := podsExpectedOnFreshNode(sanitizedExample, daemonsets, forceDaemonSets, randSuffix) + if err != nil { + return nil, err + } + // No need to sanitize the expected pods again - they either come from sanitizedExample and were sanitized above, + // or were added by podsExpectedOnFreshNode and sanitized there. + return framework.NewNodeInfo(sanitizedExample.Node(), nil, expectedPods...), nil +} + +// FreshNodeInfoFromTemplateNodeInfo duplicates the provided template NodeInfo, returning a fresh NodeInfo that can be injected into the cluster snapshot. +// The NodeInfo is sanitized (names, UIDs are changed, etc.), so that it can be injected along other copies created from the same template. +func FreshNodeInfoFromTemplateNodeInfo(template *framework.NodeInfo, suffix string) *framework.NodeInfo { + // Template node infos should already have taints and pods filtered, so not setting these parameters. + return sanitizeNodeInfo(template, template.Node().Name, suffix, nil) +} + +func sanitizeNodeInfo(nodeInfo *framework.NodeInfo, newNodeNameBase string, namesSuffix string, taintConfig *taints.TaintConfig) *framework.NodeInfo { + freshNodeName := fmt.Sprintf("%s-%s", newNodeNameBase, namesSuffix) + freshNode := sanitizeNode(nodeInfo.Node(), freshNodeName, taintConfig) + result := framework.NewNodeInfo(freshNode, nil) + + for _, podInfo := range nodeInfo.Pods() { + freshPod := sanitizePod(podInfo.Pod, freshNode.Name, namesSuffix) + result.AddPod(&framework.PodInfo{Pod: freshPod}) + } + return result +} + +func sanitizeNode(node *apiv1.Node, newName string, taintConfig *taints.TaintConfig) *apiv1.Node { + newNode := node.DeepCopy() + newNode.UID = uuid.NewUUID() + + newNode.Name = newName + if newNode.Labels == nil { + newNode.Labels = make(map[string]string) + } + newNode.Labels[apiv1.LabelHostname] = newName + + if taintConfig != nil { + newNode.Spec.Taints = taints.SanitizeTaints(newNode.Spec.Taints, *taintConfig) + } + return newNode +} + +func sanitizePod(pod *apiv1.Pod, nodeName, nameSuffix string) *apiv1.Pod { + sanitizedPod := pod.DeepCopy() + sanitizedPod.UID = uuid.NewUUID() + sanitizedPod.Name = fmt.Sprintf("%s-%s", pod.Name, nameSuffix) + sanitizedPod.Spec.NodeName = nodeName + return sanitizedPod +} + +func podsExpectedOnFreshNode(sanitizedExampleNodeInfo *framework.NodeInfo, daemonsets []*appsv1.DaemonSet, forceDaemonSets bool, nameSuffix string) ([]*framework.PodInfo, errors.AutoscalerError) { + var result []*framework.PodInfo + runningDS := make(map[types.UID]bool) + for _, pod := range sanitizedExampleNodeInfo.Pods() { + // Ignore scheduled pods in deletion phase + if pod.DeletionTimestamp != nil { + continue + } + // Add scheduled mirror and DS pods + if pod_util.IsMirrorPod(pod.Pod) || pod_util.IsDaemonSetPod(pod.Pod) { + result = append(result, pod) + } + // Mark DS pods as running + controllerRef := metav1.GetControllerOf(pod) + if controllerRef != nil && controllerRef.Kind == "DaemonSet" { + runningDS[controllerRef.UID] = true + } + } + // Add all pending DS pods if force scheduling DS + if forceDaemonSets { + var pendingDS []*appsv1.DaemonSet + for _, ds := range daemonsets { + if !runningDS[ds.UID] { + pendingDS = append(pendingDS, ds) + } + } + // The provided nodeInfo has to have taints properly sanitized, or this won't work correctly. + daemonPods, err := daemonset.GetDaemonSetPodsForNode(sanitizedExampleNodeInfo, pendingDS) + if err != nil { + return nil, errors.ToAutoscalerError(errors.InternalError, err) + } + for _, pod := range daemonPods { + // There's technically no need to sanitize these pods since they're created from scratch, but + // it's nice to have the same suffix for all names in one sanitized NodeInfo when debugging. + result = append(result, &framework.PodInfo{Pod: sanitizePod(pod.Pod, sanitizedExampleNodeInfo.Node().Name, nameSuffix)}) + } + } + return result, nil +} diff --git a/cluster-autoscaler/simulator/node_info_utils_test.go b/cluster-autoscaler/simulator/node_info_utils_test.go new file mode 100644 index 000000000000..00350eb36947 --- /dev/null +++ b/cluster-autoscaler/simulator/node_info_utils_test.go @@ -0,0 +1,510 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package simulator + +import ( + "fmt" + "math/rand" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/autoscaler/cluster-autoscaler/utils/taints" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + "k8s.io/kubernetes/pkg/controller/daemon" +) + +var ( + ds1 = &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds1", + Namespace: "ds1-namespace", + UID: types.UID("ds1"), + }, + } + ds2 = &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds2", + Namespace: "ds2-namespace", + UID: types.UID("ds2"), + }, + } + ds3 = &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds3", + Namespace: "ds3-namespace", + UID: types.UID("ds3"), + }, + Spec: appsv1.DaemonSetSpec{ + Template: apiv1.PodTemplateSpec{ + Spec: apiv1.PodSpec{ + NodeSelector: map[string]string{"key": "value"}, + }, + }, + }, + } + testDaemonSets = []*appsv1.DaemonSet{ds1, ds2, ds3} +) + +func TestTemplateNodeInfoFromNodeGroupTemplate(t *testing.T) { + exampleNode := BuildTestNode("n", 1000, 10) + exampleNode.Spec.Taints = []apiv1.Taint{ + {Key: taints.ToBeDeletedTaint, Value: "2312532423", Effect: apiv1.TaintEffectNoSchedule}, + } + + for _, tc := range []struct { + testName string + nodeGroup *fakeNodeGroup + + wantPods []*apiv1.Pod + wantCpError bool + }{ + { + testName: "node group error results in an error", + nodeGroup: &fakeNodeGroup{templateNodeInfoErr: fmt.Errorf("test error")}, + wantCpError: true, + }, + { + testName: "simple template with no pods", + nodeGroup: &fakeNodeGroup{ + templateNodeInfoResult: framework.NewNodeInfo(exampleNode, nil), + }, + wantPods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + buildDSPod(ds2, "n"), + }, + }, + { + testName: "template with all kinds of pods", + nodeGroup: &fakeNodeGroup{ + templateNodeInfoResult: framework.NewNodeInfo(exampleNode, nil, + &framework.PodInfo{Pod: BuildScheduledTestPod("p1", 100, 1, "n")}, + &framework.PodInfo{Pod: BuildScheduledTestPod("p2", 100, 1, "n")}, + &framework.PodInfo{Pod: SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n"))}, + &framework.PodInfo{Pod: setDeletionTimestamp(SetMirrorPodSpec(BuildScheduledTestPod("p4", 100, 1, "n")))}, + &framework.PodInfo{Pod: buildDSPod(ds1, "n")}, + &framework.PodInfo{Pod: setDeletionTimestamp(buildDSPod(ds2, "n"))}, + ), + }, + wantPods: []*apiv1.Pod{ + SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")), + buildDSPod(ds1, "n"), + buildDSPod(ds2, "n"), + }, + }, + } { + t.Run(tc.testName, func(t *testing.T) { + templateNodeInfo, err := TemplateNodeInfoFromNodeGroupTemplate(tc.nodeGroup, testDaemonSets, taints.TaintConfig{}) + if tc.wantCpError { + if err == nil || err.Type() != errors.CloudProviderError { + t.Fatalf("TemplateNodeInfoFromNodeGroupTemplate(): want CloudProviderError, but got: %v (%T)", err, err) + } else { + return + } + } + if err != nil { + t.Fatalf("TemplateNodeInfoFromNodeGroupTemplate(): expected no error, but got %v", err) + } + + // Verify that the taints are correctly sanitized. + // Verify that the NodeInfo is sanitized using the node group id as base. + // Pass empty string as nameSuffix so that it's auto-determined from the sanitized templateNodeInfo, because + // TemplateNodeInfoFromNodeGroupTemplate randomizes the suffix. + // Pass non-empty expectedPods to verify that the set of pods is changed as expected (e.g. DS pods added, non-DS/deleted pods removed). + if err := verifyNodeInfoSanitization(tc.nodeGroup.templateNodeInfoResult, templateNodeInfo, tc.wantPods, "template-node-for-"+tc.nodeGroup.id, "", nil); err != nil { + t.Fatalf("TemplateNodeInfoFromExampleNodeInfo(): NodeInfo wasn't properly sanitized: %v", err) + } + }) + } +} + +func TestTemplateNodeInfoFromExampleNodeInfo(t *testing.T) { + exampleNode := BuildTestNode("n", 1000, 10) + exampleNode.Spec.Taints = []apiv1.Taint{ + {Key: taints.ToBeDeletedTaint, Value: "2312532423", Effect: apiv1.TaintEffectNoSchedule}, + } + + testCases := []struct { + name string + pods []*apiv1.Pod + daemonSets []*appsv1.DaemonSet + forceDS bool + + wantPods []*apiv1.Pod + wantError bool + }{ + { + name: "node without any pods", + }, + { + name: "node with non-DS/mirror pods", + pods: []*apiv1.Pod{ + BuildScheduledTestPod("p1", 100, 1, "n"), + BuildScheduledTestPod("p2", 100, 1, "n"), + }, + wantPods: []*apiv1.Pod{}, + }, + { + name: "node with a mirror pod", + pods: []*apiv1.Pod{ + SetMirrorPodSpec(BuildScheduledTestPod("p1", 100, 1, "n")), + }, + wantPods: []*apiv1.Pod{ + SetMirrorPodSpec(BuildScheduledTestPod("p1", 100, 1, "n")), + }, + }, + { + name: "node with a deleted mirror pod", + pods: []*apiv1.Pod{ + SetMirrorPodSpec(BuildScheduledTestPod("p1", 100, 1, "n")), + setDeletionTimestamp(SetMirrorPodSpec(BuildScheduledTestPod("p2", 100, 1, "n"))), + }, + wantPods: []*apiv1.Pod{ + SetMirrorPodSpec(BuildScheduledTestPod("p1", 100, 1, "n")), + }, + }, + { + name: "node with DS pods [forceDS=false, no daemon sets]", + pods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + setDeletionTimestamp(buildDSPod(ds2, "n")), + }, + wantPods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + }, + }, + { + name: "node with DS pods [forceDS=false, some daemon sets]", + pods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + setDeletionTimestamp(buildDSPod(ds2, "n")), + }, + daemonSets: testDaemonSets, + wantPods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + }, + }, + { + name: "node with a DS pod [forceDS=true, no daemon sets]", + pods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + setDeletionTimestamp(buildDSPod(ds2, "n")), + }, + wantPods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + }, + forceDS: true, + }, + { + name: "node with a DS pod [forceDS=true, some daemon sets]", + pods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + setDeletionTimestamp(buildDSPod(ds2, "n")), + }, + daemonSets: testDaemonSets, + forceDS: true, + wantPods: []*apiv1.Pod{ + buildDSPod(ds1, "n"), + buildDSPod(ds2, "n"), + }, + }, + { + name: "everything together [forceDS=false]", + pods: []*apiv1.Pod{ + BuildScheduledTestPod("p1", 100, 1, "n"), + BuildScheduledTestPod("p2", 100, 1, "n"), + SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")), + setDeletionTimestamp(SetMirrorPodSpec(BuildScheduledTestPod("p4", 100, 1, "n"))), + buildDSPod(ds1, "n"), + setDeletionTimestamp(buildDSPod(ds2, "n")), + }, + daemonSets: testDaemonSets, + wantPods: []*apiv1.Pod{ + SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")), + buildDSPod(ds1, "n"), + }, + }, + { + name: "everything together [forceDS=true]", + pods: []*apiv1.Pod{ + BuildScheduledTestPod("p1", 100, 1, "n"), + BuildScheduledTestPod("p2", 100, 1, "n"), + SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")), + setDeletionTimestamp(SetMirrorPodSpec(BuildScheduledTestPod("p4", 100, 1, "n"))), + buildDSPod(ds1, "n"), + setDeletionTimestamp(buildDSPod(ds2, "n")), + }, + daemonSets: testDaemonSets, + forceDS: true, + wantPods: []*apiv1.Pod{ + SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")), + buildDSPod(ds1, "n"), + buildDSPod(ds2, "n"), + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + nodeGroupId := "nodeGroupId" + exampleNodeInfo := framework.NewNodeInfo(exampleNode, nil) + for _, pod := range tc.pods { + exampleNodeInfo.AddPod(&framework.PodInfo{Pod: pod}) + } + + templateNodeInfo, err := TemplateNodeInfoFromExampleNodeInfo(exampleNodeInfo, nodeGroupId, tc.daemonSets, tc.forceDS, taints.TaintConfig{}) + if tc.wantError { + if err == nil { + t.Fatal("TemplateNodeInfoFromExampleNodeInfo(): want error, but got nil") + } else { + return + } + } + if err != nil { + t.Fatalf("TemplateNodeInfoFromExampleNodeInfo(): expected no error, but got %v", err) + } + + // Verify that the taints are correctly sanitized. + // Verify that the NodeInfo is sanitized using the node group id as base. + // Pass empty string as nameSuffix so that it's auto-determined from the sanitized templateNodeInfo, because + // TemplateNodeInfoFromExampleNodeInfo randomizes the suffix. + // Pass non-empty expectedPods to verify that the set of pods is changed as expected (e.g. DS pods added, non-DS/deleted pods removed). + if err := verifyNodeInfoSanitization(exampleNodeInfo, templateNodeInfo, tc.wantPods, "template-node-for-"+nodeGroupId, "", nil); err != nil { + t.Fatalf("TemplateNodeInfoFromExampleNodeInfo(): NodeInfo wasn't properly sanitized: %v", err) + } + }) + } +} + +func TestFreshNodeInfoFromTemplateNodeInfo(t *testing.T) { + nodeName := "template-node" + templateNode := BuildTestNode(nodeName, 1000, 1000) + templateNode.Spec.Taints = []apiv1.Taint{ + {Key: "startup-taint", Value: "true", Effect: apiv1.TaintEffectNoSchedule}, + {Key: taints.ToBeDeletedTaint, Value: "2312532423", Effect: apiv1.TaintEffectNoSchedule}, + {Key: "a", Value: "b", Effect: apiv1.TaintEffectNoSchedule}, + } + pods := []*framework.PodInfo{ + {Pod: BuildTestPod("p1", 80, 0, WithNodeName(nodeName))}, + {Pod: BuildTestPod("p2", 80, 0, WithNodeName(nodeName))}, + } + templateNodeInfo := framework.NewNodeInfo(templateNode, nil, pods...) + + suffix := "abc" + freshNodeInfo := FreshNodeInfoFromTemplateNodeInfo(templateNodeInfo, suffix) + // Verify that the taints are not sanitized (they should be sanitized in the template already). + // Verify that the NodeInfo is sanitized using the template Node name as base. + initialTaints := templateNodeInfo.Node().Spec.Taints + if err := verifyNodeInfoSanitization(templateNodeInfo, freshNodeInfo, nil, templateNodeInfo.Node().Name, suffix, initialTaints); err != nil { + t.Fatalf("FreshNodeInfoFromTemplateNodeInfo(): NodeInfo wasn't properly sanitized: %v", err) + } +} + +func TestSanitizeNodeInfo(t *testing.T) { + oldNodeName := "old-node" + basicNode := BuildTestNode(oldNodeName, 1000, 1000) + + labelsNode := basicNode.DeepCopy() + labelsNode.Labels = map[string]string{ + apiv1.LabelHostname: oldNodeName, + "a": "b", + "x": "y", + } + + taintsNode := basicNode.DeepCopy() + taintsNode.Spec.Taints = []apiv1.Taint{ + {Key: "startup-taint", Value: "true", Effect: apiv1.TaintEffectNoSchedule}, + {Key: taints.ToBeDeletedTaint, Value: "2312532423", Effect: apiv1.TaintEffectNoSchedule}, + {Key: "a", Value: "b", Effect: apiv1.TaintEffectNoSchedule}, + } + taintConfig := taints.NewTaintConfig(config.AutoscalingOptions{StartupTaints: []string{"startup-taint"}}) + + taintsLabelsNode := labelsNode.DeepCopy() + taintsLabelsNode.Spec.Taints = taintsNode.Spec.Taints + + pods := []*framework.PodInfo{ + {Pod: BuildTestPod("p1", 80, 0, WithNodeName(oldNodeName))}, + {Pod: BuildTestPod("p2", 80, 0, WithNodeName(oldNodeName))}, + } + + for _, tc := range []struct { + testName string + + nodeInfo *framework.NodeInfo + taintConfig *taints.TaintConfig + + wantTaints []apiv1.Taint + }{ + { + testName: "sanitize node", + nodeInfo: framework.NewTestNodeInfo(basicNode), + }, + { + testName: "sanitize node labels", + nodeInfo: framework.NewTestNodeInfo(labelsNode), + }, + { + testName: "sanitize node taints - disabled", + nodeInfo: framework.NewTestNodeInfo(taintsNode), + taintConfig: nil, + wantTaints: taintsNode.Spec.Taints, + }, + { + testName: "sanitize node taints - enabled", + nodeInfo: framework.NewTestNodeInfo(taintsNode), + taintConfig: &taintConfig, + wantTaints: []apiv1.Taint{{Key: "a", Value: "b", Effect: apiv1.TaintEffectNoSchedule}}, + }, + { + testName: "sanitize pods", + nodeInfo: framework.NewNodeInfo(basicNode, nil, pods...), + }, + { + testName: "sanitize everything", + nodeInfo: framework.NewNodeInfo(taintsLabelsNode, nil, pods...), + taintConfig: &taintConfig, + wantTaints: []apiv1.Taint{{Key: "a", Value: "b", Effect: apiv1.TaintEffectNoSchedule}}, + }, + } { + t.Run(tc.testName, func(t *testing.T) { + newNameBase := "node" + suffix := "abc" + sanitizedNodeInfo := sanitizeNodeInfo(tc.nodeInfo, newNameBase, suffix, tc.taintConfig) + if err := verifyNodeInfoSanitization(tc.nodeInfo, sanitizedNodeInfo, nil, newNameBase, suffix, tc.wantTaints); err != nil { + t.Fatalf("sanitizeNodeInfo(): NodeInfo wasn't properly sanitized: %v", err) + } + }) + } +} + +// verifyNodeInfoSanitization verifies whether sanitizedNodeInfo was correctly sanitized starting from initialNodeInfo, with the provided +// nameBase and nameSuffix. The expected taints aren't auto-determined, so wantTaints should always be provided. +// +// If nameSuffix is an empty string, the suffix will be determined from sanitizedNodeInfo. This is useful if +// the test doesn't know/control the name suffix (e.g. because it's randomized by the tested function). +// +// If expectedPods is nil, the set of pods is expected not to change between initialNodeInfo and sanitizedNodeInfo. If the sanitization is +// expected to change the set of pods, the expected set should be passed to expectedPods. +func verifyNodeInfoSanitization(initialNodeInfo, sanitizedNodeInfo *framework.NodeInfo, expectedPods []*apiv1.Pod, nameBase, nameSuffix string, wantTaints []apiv1.Taint) error { + if nameSuffix == "" { + // Determine the suffix from the provided sanitized NodeInfo - it should be the last part of a dash-separated name. + nameParts := strings.Split(sanitizedNodeInfo.Node().Name, "-") + if len(nameParts) < 2 { + return fmt.Errorf("sanitized NodeInfo name unexpected: want format -, got %q", sanitizedNodeInfo.Node().Name) + } + nameSuffix = nameParts[len(nameParts)-1] + } + if expectedPods != nil { + // If the sanitization is expected to change the set of pods, hack the initial NodeInfo to have the expected pods. + // Then we can just compare things pod-by-pod as if the set didn't change. + initialNodeInfo = framework.NewNodeInfo(initialNodeInfo.Node(), nil) + for _, pod := range expectedPods { + initialNodeInfo.AddPod(&framework.PodInfo{Pod: pod}) + } + } + + // Verification below assumes the same set of pods between initialNodeInfo and sanitizedNodeInfo. + wantNodeName := fmt.Sprintf("%s-%s", nameBase, nameSuffix) + if gotName := sanitizedNodeInfo.Node().Name; gotName != wantNodeName { + return fmt.Errorf("want sanitized Node name %q, got %q", wantNodeName, gotName) + } + if gotUid, oldUid := sanitizedNodeInfo.Node().UID, initialNodeInfo.Node().UID; gotUid == "" || gotUid == oldUid { + return fmt.Errorf("sanitized Node UID wasn't randomized - got %q, old UID was %q", gotUid, oldUid) + } + wantLabels := make(map[string]string) + for k, v := range initialNodeInfo.Node().Labels { + wantLabels[k] = v + } + wantLabels[apiv1.LabelHostname] = wantNodeName + if diff := cmp.Diff(wantLabels, sanitizedNodeInfo.Node().Labels); diff != "" { + return fmt.Errorf("sanitized Node labels unexpected, diff (-want +got): %s", diff) + } + if diff := cmp.Diff(wantTaints, sanitizedNodeInfo.Node().Spec.Taints); diff != "" { + return fmt.Errorf("sanitized Node taints unexpected, diff (-want +got): %s", diff) + } + if diff := cmp.Diff(initialNodeInfo.Node(), sanitizedNodeInfo.Node(), + cmpopts.IgnoreFields(metav1.ObjectMeta{}, "Name", "Labels", "UID"), + cmpopts.IgnoreFields(apiv1.NodeSpec{}, "Taints"), + ); diff != "" { + return fmt.Errorf("sanitized Node unexpected diff (-want +got): %s", diff) + } + + oldPods := initialNodeInfo.Pods() + newPods := sanitizedNodeInfo.Pods() + if len(oldPods) != len(newPods) { + return fmt.Errorf("want %d pods in sanitized NodeInfo, got %d", len(oldPods), len(newPods)) + } + for i, newPod := range newPods { + oldPod := oldPods[i] + + if newPod.Name == oldPod.Name || !strings.HasSuffix(newPod.Name, nameSuffix) { + return fmt.Errorf("sanitized Pod name unexpected: want (different than %q, ending in %q), got %q", oldPod.Name, nameSuffix, newPod.Name) + } + if gotUid, oldUid := newPod.UID, oldPod.UID; gotUid == "" || gotUid == oldUid { + return fmt.Errorf("sanitized Pod UID wasn't randomized - got %q, old UID was %q", gotUid, oldUid) + } + if gotNodeName := newPod.Spec.NodeName; gotNodeName != wantNodeName { + return fmt.Errorf("want sanitized Pod.Spec.NodeName %q, got %q", wantNodeName, gotNodeName) + } + if diff := cmp.Diff(oldPod, newPod, + cmpopts.IgnoreFields(metav1.ObjectMeta{}, "Name", "UID"), + cmpopts.IgnoreFields(apiv1.PodSpec{}, "NodeName"), + ); diff != "" { + return fmt.Errorf("sanitized Pod unexpected diff (-want +got): %s", diff) + } + } + return nil +} + +func buildDSPod(ds *appsv1.DaemonSet, nodeName string) *apiv1.Pod { + pod := daemon.NewPod(ds, nodeName) + pod.Name = fmt.Sprintf("%s-pod-%d", ds.Name, rand.Int63()) + ptrVal := true + pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "DaemonSet", UID: ds.UID, Name: ds.Name, Controller: &ptrVal}, + } + return pod +} + +func setDeletionTimestamp(pod *apiv1.Pod) *apiv1.Pod { + now := metav1.NewTime(time.Now()) + pod.DeletionTimestamp = &now + return pod +} + +type fakeNodeGroup struct { + id string + templateNodeInfoResult *framework.NodeInfo + templateNodeInfoErr error +} + +func (f *fakeNodeGroup) Id() string { + return f.id +} + +func (f *fakeNodeGroup) TemplateNodeInfo() (*framework.NodeInfo, error) { + return f.templateNodeInfoResult, f.templateNodeInfoErr +} diff --git a/cluster-autoscaler/simulator/nodes.go b/cluster-autoscaler/simulator/nodes.go deleted file mode 100644 index c80fe0cbe092..000000000000 --- a/cluster-autoscaler/simulator/nodes.go +++ /dev/null @@ -1,71 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package simulator - -import ( - appsv1 "k8s.io/api/apps/v1" - apiv1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/utils/daemonset" - "k8s.io/autoscaler/cluster-autoscaler/utils/errors" - - pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" -) - -// BuildNodeInfoForNode build a NodeInfo structure for the given node as if the node was just created. -func BuildNodeInfoForNode(node *apiv1.Node, scheduledPods []*apiv1.Pod, daemonsets []*appsv1.DaemonSet, forceDaemonSets bool) (*framework.NodeInfo, errors.AutoscalerError) { - nodeInfo := framework.NewNodeInfo(node, nil) - return addExpectedPods(nodeInfo, scheduledPods, daemonsets, forceDaemonSets) -} - -func addExpectedPods(nodeInfo *framework.NodeInfo, scheduledPods []*apiv1.Pod, daemonsets []*appsv1.DaemonSet, forceDaemonSets bool) (*framework.NodeInfo, errors.AutoscalerError) { - runningDS := make(map[types.UID]bool) - for _, pod := range scheduledPods { - // Ignore scheduled pods in deletion phase - if pod.DeletionTimestamp != nil { - continue - } - // Add scheduled mirror and DS pods - if pod_util.IsMirrorPod(pod) || pod_util.IsDaemonSetPod(pod) { - nodeInfo.AddPod(&framework.PodInfo{Pod: pod}) - } - // Mark DS pods as running - controllerRef := metav1.GetControllerOf(pod) - if controllerRef != nil && controllerRef.Kind == "DaemonSet" { - runningDS[controllerRef.UID] = true - } - } - // Add all pending DS pods if force scheduling DS - if forceDaemonSets { - var pendingDS []*appsv1.DaemonSet - for _, ds := range daemonsets { - if !runningDS[ds.UID] { - pendingDS = append(pendingDS, ds) - } - } - daemonPods, err := daemonset.GetDaemonSetPodsForNode(nodeInfo, pendingDS) - if err != nil { - return nil, errors.ToAutoscalerError(errors.InternalError, err) - } - for _, pod := range daemonPods { - nodeInfo.AddPod(pod) - } - } - return nodeInfo, nil -} diff --git a/cluster-autoscaler/simulator/nodes_test.go b/cluster-autoscaler/simulator/nodes_test.go deleted file mode 100644 index b931979de6cb..000000000000 --- a/cluster-autoscaler/simulator/nodes_test.go +++ /dev/null @@ -1,239 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package simulator - -import ( - "strings" - "testing" - "time" - - "github.com/stretchr/testify/assert" - appsv1 "k8s.io/api/apps/v1" - apiv1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/autoscaler/cluster-autoscaler/utils/test" - "k8s.io/kubernetes/pkg/controller/daemon" -) - -func TestBuildNodeInfoForNode(t *testing.T) { - ds1 := &appsv1.DaemonSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ds1", - Namespace: "ds1-namespace", - UID: types.UID("ds1"), - }, - } - - ds2 := &appsv1.DaemonSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ds2", - Namespace: "ds2-namespace", - UID: types.UID("ds2"), - }, - } - - ds3 := &appsv1.DaemonSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ds3", - Namespace: "ds3-namespace", - UID: types.UID("ds3"), - }, - Spec: appsv1.DaemonSetSpec{ - Template: apiv1.PodTemplateSpec{ - Spec: apiv1.PodSpec{ - NodeSelector: map[string]string{"key": "value"}, - }, - }, - }, - } - - testCases := []struct { - name string - node *apiv1.Node - pods []*apiv1.Pod - daemonSets []*appsv1.DaemonSet - forceDS bool - - wantPods []*apiv1.Pod - wantError bool - }{ - { - name: "node without any pods", - node: test.BuildTestNode("n", 1000, 10), - }, - { - name: "node with non-DS/mirror pods", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - test.BuildScheduledTestPod("p1", 100, 1, "n"), - test.BuildScheduledTestPod("p2", 100, 1, "n"), - }, - }, - { - name: "node with a mirror pod", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - test.SetMirrorPodSpec(test.BuildScheduledTestPod("p1", 100, 1, "n")), - }, - wantPods: []*apiv1.Pod{ - test.SetMirrorPodSpec(test.BuildScheduledTestPod("p1", 100, 1, "n")), - }, - }, - { - name: "node with a deleted mirror pod", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - test.SetMirrorPodSpec(test.BuildScheduledTestPod("p1", 100, 1, "n")), - setDeletionTimestamp(test.SetMirrorPodSpec(test.BuildScheduledTestPod("p2", 100, 1, "n"))), - }, - wantPods: []*apiv1.Pod{ - test.SetMirrorPodSpec(test.BuildScheduledTestPod("p1", 100, 1, "n")), - }, - }, - { - name: "node with DS pods [forceDS=false, no daemon sets]", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - buildDSPod(ds1, "n"), - setDeletionTimestamp(buildDSPod(ds2, "n")), - }, - wantPods: []*apiv1.Pod{ - buildDSPod(ds1, "n"), - }, - }, - { - name: "node with DS pods [forceDS=false, some daemon sets]", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - buildDSPod(ds1, "n"), - setDeletionTimestamp(buildDSPod(ds2, "n")), - }, - daemonSets: []*appsv1.DaemonSet{ds1, ds2, ds3}, - wantPods: []*apiv1.Pod{ - buildDSPod(ds1, "n"), - }, - }, - { - name: "node with a DS pod [forceDS=true, no daemon sets]", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - buildDSPod(ds1, "n"), - setDeletionTimestamp(buildDSPod(ds2, "n")), - }, - wantPods: []*apiv1.Pod{ - buildDSPod(ds1, "n"), - }, - forceDS: true, - }, - { - name: "node with a DS pod [forceDS=true, some daemon sets]", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - buildDSPod(ds1, "n"), - setDeletionTimestamp(buildDSPod(ds2, "n")), - }, - daemonSets: []*appsv1.DaemonSet{ds1, ds2, ds3}, - forceDS: true, - wantPods: []*apiv1.Pod{ - buildDSPod(ds1, "n"), - buildDSPod(ds2, "n"), - }, - }, - { - name: "everything together [forceDS=false]", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - test.BuildScheduledTestPod("p1", 100, 1, "n"), - test.BuildScheduledTestPod("p2", 100, 1, "n"), - test.SetMirrorPodSpec(test.BuildScheduledTestPod("p3", 100, 1, "n")), - setDeletionTimestamp(test.SetMirrorPodSpec(test.BuildScheduledTestPod("p4", 100, 1, "n"))), - buildDSPod(ds1, "n"), - setDeletionTimestamp(buildDSPod(ds2, "n")), - }, - daemonSets: []*appsv1.DaemonSet{ds1, ds2, ds3}, - wantPods: []*apiv1.Pod{ - test.SetMirrorPodSpec(test.BuildScheduledTestPod("p3", 100, 1, "n")), - buildDSPod(ds1, "n"), - }, - }, - { - name: "everything together [forceDS=true]", - node: test.BuildTestNode("n", 1000, 10), - pods: []*apiv1.Pod{ - test.BuildScheduledTestPod("p1", 100, 1, "n"), - test.BuildScheduledTestPod("p2", 100, 1, "n"), - test.SetMirrorPodSpec(test.BuildScheduledTestPod("p3", 100, 1, "n")), - setDeletionTimestamp(test.SetMirrorPodSpec(test.BuildScheduledTestPod("p4", 100, 1, "n"))), - buildDSPod(ds1, "n"), - setDeletionTimestamp(buildDSPod(ds2, "n")), - }, - daemonSets: []*appsv1.DaemonSet{ds1, ds2, ds3}, - forceDS: true, - wantPods: []*apiv1.Pod{ - test.SetMirrorPodSpec(test.BuildScheduledTestPod("p3", 100, 1, "n")), - buildDSPod(ds1, "n"), - buildDSPod(ds2, "n"), - }, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - nodeInfo, err := BuildNodeInfoForNode(tc.node, tc.pods, tc.daemonSets, tc.forceDS) - - if tc.wantError { - assert.Error(t, err) - } else { - assert.NoError(t, err) - assert.Equal(t, nodeInfo.Node(), tc.node) - - // clean pod metadata for comparison purposes - var wantPods, pods []*apiv1.Pod - for _, pod := range tc.wantPods { - wantPods = append(wantPods, cleanPodMetadata(pod)) - } - for _, podInfo := range nodeInfo.Pods() { - pods = append(pods, cleanPodMetadata(podInfo.Pod)) - } - assert.ElementsMatch(t, tc.wantPods, pods) - } - }) - } -} - -func cleanPodMetadata(pod *apiv1.Pod) *apiv1.Pod { - pod.Name = strings.Split(pod.Name, "-")[0] - pod.OwnerReferences = nil - return pod -} - -func buildDSPod(ds *appsv1.DaemonSet, nodeName string) *apiv1.Pod { - pod := daemon.NewPod(ds, nodeName) - pod.Name = ds.Name - ptrVal := true - pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ - {Kind: "DaemonSet", UID: ds.UID, Controller: &ptrVal}, - } - return pod -} - -func setDeletionTimestamp(pod *apiv1.Pod) *apiv1.Pod { - now := metav1.NewTime(time.Now()) - pod.DeletionTimestamp = &now - return pod -} diff --git a/cluster-autoscaler/utils/daemonset/daemonset.go b/cluster-autoscaler/utils/daemonset/daemonset.go index 06236ae2443c..dbeab83ad96b 100644 --- a/cluster-autoscaler/utils/daemonset/daemonset.go +++ b/cluster-autoscaler/utils/daemonset/daemonset.go @@ -22,6 +22,7 @@ import ( appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/kubernetes/pkg/controller/daemon" ) @@ -40,6 +41,10 @@ func GetDaemonSetPodsForNode(nodeInfo *framework.NodeInfo, daemonsets []*appsv1. if shouldRun { pod := daemon.NewPod(ds, nodeInfo.Node().Name) pod.Name = fmt.Sprintf("%s-pod-%d", ds.Name, rand.Int63()) + ptrVal := true + pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "DaemonSet", UID: ds.UID, Name: ds.Name, Controller: &ptrVal}, + } result = append(result, &framework.PodInfo{Pod: pod}) } } diff --git a/cluster-autoscaler/utils/scheduler/scheduler.go b/cluster-autoscaler/utils/scheduler/scheduler.go index 04a6e99e7af7..c63da3cbf437 100644 --- a/cluster-autoscaler/utils/scheduler/scheduler.go +++ b/cluster-autoscaler/utils/scheduler/scheduler.go @@ -23,7 +23,6 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" scheduler_config "k8s.io/kubernetes/pkg/scheduler/apis/config" scheduler_scheme "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme" @@ -79,27 +78,6 @@ func isHugePageResourceName(name apiv1.ResourceName) bool { return strings.HasPrefix(string(name), apiv1.ResourceHugePagesPrefix) } -// DeepCopyTemplateNode copies NodeInfo object used as a template. It changes -// names of UIDs of both node and pods running on it, so that copies can be used -// to represent multiple nodes. -func DeepCopyTemplateNode(nodeTemplate *framework.NodeInfo, suffix string) *framework.NodeInfo { - node := nodeTemplate.Node().DeepCopy() - node.Name = fmt.Sprintf("%s-%s", node.Name, suffix) - node.UID = uuid.NewUUID() - if node.Labels == nil { - node.Labels = make(map[string]string) - } - node.Labels["kubernetes.io/hostname"] = node.Name - nodeInfo := framework.NewNodeInfo(node, nil) - for _, podInfo := range nodeTemplate.Pods() { - pod := podInfo.Pod.DeepCopy() - pod.Name = fmt.Sprintf("%s-%s", podInfo.Pod.Name, suffix) - pod.UID = uuid.NewUUID() - nodeInfo.AddPod(&framework.PodInfo{Pod: pod}) - } - return nodeInfo -} - // ResourceToResourceList returns a resource list of the resource. func ResourceToResourceList(r *schedulerframework.Resource) apiv1.ResourceList { result := apiv1.ResourceList{