From ce9f7284e41fe15d755d4dc2a6e474b17cec2a32 Mon Sep 17 00:00:00 2001 From: Nick Tran <10810510+njtran@users.noreply.github.com> Date: Mon, 5 Feb 2024 14:22:54 -0800 Subject: [PATCH] test: add budgets replacement tests (#5602) Co-authored-by: Jonathan Innis --- test/pkg/environment/common/expectations.go | 29 +++++- test/suites/consolidation/suite_test.go | 109 +++++++++++++++++++- test/suites/drift/suite_test.go | 82 ++++++++++++++- test/suites/expiration/suite_test.go | 84 ++++++++++++++- 4 files changed, 290 insertions(+), 14 deletions(-) diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index 64835670b9a9..9d8779de6526 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -486,6 +486,12 @@ func (env *Environment) ExpectNodeClaimCount(comparator string, count int) { Expect(len(nodeClaimList.Items)).To(BeNumerically(comparator, count)) } +func NodeClaimNames(nodeClaims []*corev1beta1.NodeClaim) []string { + return lo.Map(nodeClaims, func(n *corev1beta1.NodeClaim, index int) string { + return n.Name + }) +} + func NodeNames(nodes []*v1.Node) []string { return lo.Map(nodes, func(n *v1.Node, index int) string { return n.Name @@ -506,21 +512,22 @@ func (env *Environment) ConsistentlyExpectNodeCount(comparator string, count int func (env *Environment) ConsistentlyExpectNoDisruptions(nodeCount int, duration time.Duration) (taintedNodes []*v1.Node) { GinkgoHelper() - return env.ConsistentlyExpectDisruptingNodesWithNodeCount(0, nodeCount, duration) + return env.ConsistentlyExpectDisruptionsWithNodeCount(0, nodeCount, duration) } -func (env *Environment) ConsistentlyExpectDisruptingNodesWithNodeCount(taintedNodeCount int, nodeCount int, duration time.Duration) (taintedNodes []*v1.Node) { +// ConsistentlyExpectDisruptionsWithNodeCount will continually ensure that there are exactly disruptingNodes with totalNodes (including replacements and existing nodes) +func (env *Environment) ConsistentlyExpectDisruptionsWithNodeCount(disruptingNodes, totalNodes int, duration time.Duration) (taintedNodes []*v1.Node) { GinkgoHelper() nodes := []v1.Node{} Consistently(func(g Gomega) { // Ensure we don't change our NodeClaims nodeClaimList := &corev1beta1.NodeClaimList{} g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed()) - g.Expect(nodeClaimList.Items).To(HaveLen(nodeCount)) + g.Expect(nodeClaimList.Items).To(HaveLen(totalNodes)) nodeList := &v1.NodeList{} g.Expect(env.Client.List(env, nodeList, client.HasLabels{test.DiscoveryLabel})).To(Succeed()) - g.Expect(nodeList.Items).To(HaveLen(nodeCount)) + g.Expect(nodeList.Items).To(HaveLen(totalNodes)) nodes = lo.Filter(nodeList.Items, func(n v1.Node, _ int) bool { _, ok := lo.Find(n.Spec.Taints, func(t v1.Taint) bool { @@ -528,7 +535,7 @@ func (env *Environment) ConsistentlyExpectDisruptingNodesWithNodeCount(taintedNo }) return ok }) - g.Expect(nodes).To(HaveLen(taintedNodeCount)) + g.Expect(nodes).To(HaveLen(disruptingNodes)) }, duration).Should(Succeed()) return lo.ToSlicePtr(nodes) } @@ -556,6 +563,18 @@ func (env *Environment) EventuallyExpectNodesUntaintedWithTimeout(timeout time.D }).WithTimeout(timeout).Should(Succeed()) } +func (env *Environment) EventuallyExpectNodeClaimCount(comparator string, count int) []*corev1beta1.NodeClaim { + GinkgoHelper() + By(fmt.Sprintf("waiting for nodes to be %s to %d", comparator, count)) + nodeClaimList := &corev1beta1.NodeClaimList{} + Eventually(func(g Gomega) { + g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed()) + g.Expect(len(nodeClaimList.Items)).To(BeNumerically(comparator, count), + fmt.Sprintf("expected %d nodeclaims, had %d (%v)", count, len(nodeClaimList.Items), NodeClaimNames(lo.ToSlicePtr(nodeClaimList.Items)))) + }).Should(Succeed()) + return lo.ToSlicePtr(nodeClaimList.Items) +} + func (env *Environment) EventuallyExpectNodeCount(comparator string, count int) []*v1.Node { GinkgoHelper() By(fmt.Sprintf("waiting for nodes to be %s to %d", comparator, count)) diff --git a/test/suites/consolidation/suite_test.go b/test/suites/consolidation/suite_test.go index 540c05bc1093..0c5a9346cfaa 100644 --- a/test/suites/consolidation/suite_test.go +++ b/test/suites/consolidation/suite_test.go @@ -126,7 +126,7 @@ var _ = Describe("Consolidation", func() { // Ensure that we get two nodes tainted, and they have overlap during the drift env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptingNodesWithNodeCount(2, 5, time.Second*5) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 5, 5*time.Second) // Remove the finalizer from each node so that we can terminate for _, node := range nodes { @@ -139,7 +139,7 @@ var _ = Describe("Consolidation", func() { // This check ensures that we are consolidating nodes at the same time env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptingNodesWithNodeCount(2, 3, time.Second*5) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) for _, node := range nodes { Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) @@ -206,9 +206,9 @@ var _ = Describe("Consolidation", func() { nodePool.Spec.Disruption.ConsolidateAfter = nil env.ExpectUpdated(nodePool) - // Ensure that we get two nodes tainted, and they have overlap during the drift + // Ensure that we get two nodes tainted, and they have overlap during consolidation env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptingNodesWithNodeCount(2, 3, time.Second*5) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) for _, node := range nodes { Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) @@ -216,6 +216,107 @@ var _ = Describe("Consolidation", func() { env.EventuallyExpectNotFound(nodes[0], nodes[1]) env.ExpectNodeCount("==", 1) }) + It("should respect budgets for non-empty replace consolidation", func() { + appLabels := map[string]string{"app": "large-app"} + // This test will hold consolidation until we are ready to execute it + nodePool.Spec.Disruption.ConsolidateAfter = &corev1beta1.NillableDuration{} + + nodePool = test.ReplaceRequirements(nodePool, + v1.NodeSelectorRequirement{ + Key: v1beta1.LabelInstanceSize, + Operator: v1.NodeSelectorOpIn, + Values: []string{"xlarge", "2xlarge"}, + }, + // Add an Exists operator so that we can select on a fake partition later + v1.NodeSelectorRequirement{ + Key: "test-partition", + Operator: v1.NodeSelectorOpExists, + }, + ) + nodePool.Labels = appLabels + // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. + nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{ + Nodes: "3", + }} + + ds := test.DaemonSet(test.DaemonSetOptions{ + Selector: appLabels, + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: appLabels, + }, + // Each 2xlarge has 8 cpu, so each node should fit no more than 1 pod since each node will have. + // an equivalently sized daemonset + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("3"), + }, + }, + }, + }) + + env.ExpectCreated(ds) + + // Make 5 pods all with different deployments and different test partitions, so that each pod can be put + // on a separate node. + selector = labels.SelectorFromSet(appLabels) + numPods = 5 + deployments := make([]*appsv1.Deployment, numPods) + for i := range lo.Range(int(numPods)) { + deployments[i] = test.Deployment(test.DeploymentOptions{ + Replicas: 1, + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: appLabels, + }, + NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)}, + // Each 2xlarge has 8 cpu, so each node should fit no more than 1 pod since each node will have. + // an equivalently sized daemonset + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("3"), + }, + }, + }, + }) + } + + env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4]) + + env.EventuallyExpectCreatedNodeClaimCount("==", 5) + nodes := env.EventuallyExpectCreatedNodeCount("==", 5) + // Check that all daemonsets and deployment pods are online + env.EventuallyExpectHealthyPodCount(selector, int(numPods)*2) + + By("cordoning and adding finalizer to the nodes") + // Add a finalizer to each node so that we can stop termination disruptions + for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) + node.Finalizers = append(node.Finalizers, common.TestingFinalizer) + env.ExpectUpdated(node) + } + + // Delete the daemonset so that the nodes can be consolidated to smaller size + env.ExpectDeleted(ds) + // Check that all daemonsets and deployment pods are online + env.EventuallyExpectHealthyPodCount(selector, int(numPods)) + + By("enabling consolidation") + nodePool.Spec.Disruption.ConsolidateAfter = nil + env.ExpectUpdated(nodePool) + + // Ensure that we get two nodes tainted, and they have overlap during the consolidation + env.EventuallyExpectTaintedNodeCount("==", 3) + env.EventuallyExpectNodeClaimCount("==", 8) + env.EventuallyExpectNodeCount("==", 8) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(3, 8, 5*time.Second) + + for _, node := range nodes { + Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) + } + env.EventuallyExpectNotFound(nodes[0], nodes[1], nodes[2]) + env.ExpectNodeCount("==", 5) + }) It("should not allow consolidation if the budget is fully blocking", func() { // We're going to define a budget that doesn't allow any consolidation to happen nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{ diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index f0cb378e9c56..032880c65680 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -161,7 +161,7 @@ var _ = Describe("Drift", func() { // Ensure that we get two nodes tainted, and they have overlap during the drift env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptingNodesWithNodeCount(2, 3, 5*time.Second) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) // Remove the finalizer from each node so that we can terminate for _, node := range nodes { @@ -253,7 +253,7 @@ var _ = Describe("Drift", func() { // Ensure that we get two nodes tainted, and they have overlap during the drift env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptingNodesWithNodeCount(2, 3, time.Minute) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 30*time.Second) By("removing the finalizer from the nodes") Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) @@ -263,6 +263,84 @@ var _ = Describe("Drift", func() { // the node should be gone env.EventuallyExpectNotFound(nodes[0], nodes[1]) }) + It("should respect budgets for non-empty replace drift", func() { + appLabels := map[string]string{"app": "large-app"} + + nodePool = coretest.ReplaceRequirements(nodePool, + v1.NodeSelectorRequirement{ + Key: v1beta1.LabelInstanceSize, + Operator: v1.NodeSelectorOpIn, + Values: []string{"xlarge"}, + }, + // Add an Exists operator so that we can select on a fake partition later + v1.NodeSelectorRequirement{ + Key: "test-partition", + Operator: v1.NodeSelectorOpExists, + }, + ) + nodePool.Labels = appLabels + // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. + nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{ + Nodes: "3", + }} + + // Make 5 pods all with different deployments and different test partitions, so that each pod can be put + // on a separate node. + selector = labels.SelectorFromSet(appLabels) + numPods = 5 + deployments := make([]*appsv1.Deployment, numPods) + for i := range lo.Range(numPods) { + deployments[i] = coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 1, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: appLabels, + }, + NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)}, + // Each xlarge has 4 cpu, so each node should fit no more than 1 pod. + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("3"), + }, + }, + }, + }) + } + + env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4]) + + env.EventuallyExpectCreatedNodeClaimCount("==", 5) + nodes := env.EventuallyExpectCreatedNodeCount("==", 5) + // Check that all deployment pods are online + env.EventuallyExpectHealthyPodCount(selector, numPods) + + By("cordoning and adding finalizer to the nodes") + // Add a finalizer to each node so that we can stop termination disruptions + for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) + node.Finalizers = append(node.Finalizers, common.TestingFinalizer) + env.ExpectUpdated(node) + } + + // Check that all daemonsets and deployment pods are online + env.EventuallyExpectHealthyPodCount(selector, numPods) + + By("drifting the nodepool") + nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{"test-annotation": "drift"}) + env.ExpectUpdated(nodePool) + + // Ensure that we get two nodes tainted, and they have overlap during the drift + env.EventuallyExpectTaintedNodeCount("==", 3) + env.EventuallyExpectNodeClaimCount("==", 8) + env.EventuallyExpectNodeCount("==", 8) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(3, 8, 5*time.Second) + + for _, node := range nodes { + Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) + } + env.EventuallyExpectNotFound(nodes[0], nodes[1], nodes[2]) + env.ExpectNodeCount("==", 5) + }) It("should not allow drift if the budget is fully blocking", func() { // We're going to define a budget that doesn't allow any drift to happen nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{ diff --git a/test/suites/expiration/suite_test.go b/test/suites/expiration/suite_test.go index d583948b800d..accf8200d2db 100644 --- a/test/suites/expiration/suite_test.go +++ b/test/suites/expiration/suite_test.go @@ -174,7 +174,7 @@ var _ = Describe("Expiration", func() { By("expecting only one disruption for 60s") // Expect only one node being disrupted as the other node should continue to be nominated. // As the pod has a 300s pre-stop sleep. - env.ConsistentlyExpectDisruptingNodesWithNodeCount(1, 2, time.Minute) + env.ConsistentlyExpectDisruptionsWithNodeCount(1, 2, time.Minute) }) It("should respect budgets for empty expiration", func() { coretest.ReplaceRequirements(nodePool, @@ -235,7 +235,7 @@ var _ = Describe("Expiration", func() { // Expect that two nodes are tainted. env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptingNodesWithNodeCount(2, 3, time.Second*5) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) // Remove finalizers for _, node := range nodes { @@ -332,7 +332,7 @@ var _ = Describe("Expiration", func() { // Ensure that we get two nodes tainted, and they have overlap during the expiration env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptingNodesWithNodeCount(2, 3, time.Second*5) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) By("removing the finalizer from the nodes") Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) @@ -342,6 +342,84 @@ var _ = Describe("Expiration", func() { // the node should be gone env.EventuallyExpectNotFound(nodes[0], nodes[1]) }) + It("should respect budgets for non-empty replace expiration", func() { + appLabels := map[string]string{"app": "large-app"} + + nodePool = coretest.ReplaceRequirements(nodePool, + v1.NodeSelectorRequirement{ + Key: v1beta1.LabelInstanceSize, + Operator: v1.NodeSelectorOpIn, + Values: []string{"xlarge"}, + }, + // Add an Exists operator so that we can select on a fake partition later + v1.NodeSelectorRequirement{ + Key: "test-partition", + Operator: v1.NodeSelectorOpExists, + }, + ) + nodePool.Labels = appLabels + // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. + nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{ + Nodes: "3", + }} + + // Make 5 pods all with different deployments and different test partitions, so that each pod can be put + // on a separate node. + selector = labels.SelectorFromSet(appLabels) + numPods = 5 + deployments := make([]*appsv1.Deployment, numPods) + for i := range lo.Range(numPods) { + deployments[i] = coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 1, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: appLabels, + }, + NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)}, + // Each xlarge has 4 cpu, so each node should fit no more than 1 pod. + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("3"), + }, + }, + }, + }) + } + + env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4]) + + env.EventuallyExpectCreatedNodeClaimCount("==", 5) + nodes := env.EventuallyExpectCreatedNodeCount("==", 5) + // Check that all daemonsets and deployment pods are online + env.EventuallyExpectHealthyPodCount(selector, numPods) + + By("cordoning and adding finalizer to the nodes") + // Add a finalizer to each node so that we can stop termination disruptions + for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) + node.Finalizers = append(node.Finalizers, common.TestingFinalizer) + env.ExpectUpdated(node) + } + + // Check that all daemonsets and deployment pods are online + env.EventuallyExpectHealthyPodCount(selector, numPods) + + By("enabling expiration") + nodePool.Spec.Disruption.ExpireAfter = corev1beta1.NillableDuration{Duration: lo.ToPtr(30 * time.Second)} + env.ExpectUpdated(nodePool) + + // Ensure that we get two nodes tainted, and they have overlap during the expiration + env.EventuallyExpectTaintedNodeCount("==", 3) + env.EventuallyExpectNodeClaimCount("==", 8) + env.EventuallyExpectNodeCount("==", 8) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(3, 8, 5*time.Second) + + for _, node := range nodes { + Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) + } + env.EventuallyExpectNotFound(nodes[0], nodes[1], nodes[2]) + env.ExpectNodeCount("==", 5) + }) It("should not allow expiration if the budget is fully blocking", func() { // We're going to define a budget that doesn't allow any expirations to happen nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{