From 529f243bd6bec5aeef416314e9da1cb4403bd709 Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Wed, 17 Jan 2024 09:23:36 -0800 Subject: [PATCH] test: Add testing for PodDisrputionBudgets in E2E testing (#5480) --- go.mod | 4 +- go.sum | 9 +- test/pkg/environment/common/expectations.go | 88 +++++++++- test/suites/drift/suite_test.go | 169 ++++++++------------ test/suites/expiration/suite_test.go | 132 +++++++-------- test/suites/integration/emptiness_test.go | 6 +- test/suites/interruption/suite_test.go | 2 +- 7 files changed, 225 insertions(+), 185 deletions(-) diff --git a/go.mod b/go.mod index a3df78b3dc95..bc1141a877fb 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( k8s.io/utils v0.0.0-20230726121419-3b25d923346b knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd sigs.k8s.io/controller-runtime v0.16.3 - sigs.k8s.io/karpenter v0.33.1-0.20240112201343-c383004c469a + sigs.k8s.io/karpenter v0.33.1-0.20240116233859-f19e1d8dfbfa ) require ( @@ -110,7 +110,7 @@ require ( k8s.io/cloud-provider v0.29.0 // indirect k8s.io/component-base v0.29.0 // indirect k8s.io/csi-translation-lib v0.29.0 // indirect - k8s.io/klog/v2 v2.110.1 // indirect + k8s.io/klog/v2 v2.120.0 // indirect k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect diff --git a/go.sum b/go.sum index 34cadb7366fc..9be1d57a8d53 100644 --- a/go.sum +++ b/go.sum @@ -114,7 +114,6 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= @@ -748,8 +747,8 @@ k8s.io/component-base v0.29.0 h1:T7rjd5wvLnPBV1vC4zWd/iWRbV8Mdxs+nGaoaFzGw3s= k8s.io/component-base v0.29.0/go.mod h1:sADonFTQ9Zc9yFLghpDpmNXEdHyQmFIGbiuZbqAXQ1M= k8s.io/csi-translation-lib v0.29.0 h1:we4X1yUlDikvm5Rv0dwMuPHNw6KwjwsQiAuOPWXha8M= k8s.io/csi-translation-lib v0.29.0/go.mod h1:Cp6t3CNBSm1dXS17V8IImUjkqfIB6KCj8Fs8wf6uyTA= -k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= -k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/klog/v2 v2.120.0 h1:z+q5mfovBj1fKFxiRzsa2DsJLPIVMk/KFL81LMOfK+8= +k8s.io/klog/v2 v2.120.0/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= @@ -763,8 +762,8 @@ sigs.k8s.io/controller-runtime v0.16.3 h1:2TuvuokmfXvDUamSx1SuAOO3eTyye+47mJCigw sigs.k8s.io/controller-runtime v0.16.3/go.mod h1:j7bialYoSn142nv9sCOJmQgDXQXxnroFU4VnX/brVJ0= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v0.33.1-0.20240112201343-c383004c469a h1:EuQ5KFs1PHLfPTGJV+0/EnMkdYBaPgt5CCLKWzwjfGE= -sigs.k8s.io/karpenter v0.33.1-0.20240112201343-c383004c469a/go.mod h1:h/O8acLmwFmYYmDD9b57+Fknlf7gQThuY19l7jpThYs= +sigs.k8s.io/karpenter v0.33.1-0.20240116233859-f19e1d8dfbfa h1:XW2HP9imNJu99aRP1ZCSwZLrgVENCdF6sZ0h4L1jLmI= +sigs.k8s.io/karpenter v0.33.1-0.20240116233859-f19e1d8dfbfa/go.mod h1:ExeDTBVknsbC9x9K4/9gwrTt1Mo9HgMaV1NHPELHQPw= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index 51751c4c824a..c4ceddd16d74 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -267,6 +267,16 @@ func (env *Environment) ExpectExists(obj client.Object) { }).WithTimeout(time.Second * 5).Should(Succeed()) } +func (env *Environment) EventuallyExpectBound(pods ...*v1.Pod) { + GinkgoHelper() + Eventually(func(g Gomega) { + for _, pod := range pods { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(pod), pod)).To(Succeed()) + g.Expect(pod.Spec.NodeName).ToNot(BeEmpty()) + } + }).Should(Succeed()) +} + func (env *Environment) EventuallyExpectHealthy(pods ...*v1.Pod) { GinkgoHelper() env.EventuallyExpectHealthyWithTimeout(-1, pods...) @@ -274,15 +284,16 @@ func (env *Environment) EventuallyExpectHealthy(pods ...*v1.Pod) { func (env *Environment) EventuallyExpectHealthyWithTimeout(timeout time.Duration, pods ...*v1.Pod) { GinkgoHelper() - for _, pod := range pods { - Eventually(func(g Gomega) { + Eventually(func(g Gomega) { + for _, pod := range pods { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(pod), pod)).To(Succeed()) g.Expect(pod.Status.Conditions).To(ContainElement(And( HaveField("Type", Equal(v1.PodReady)), HaveField("Status", Equal(v1.ConditionTrue)), ))) - }).WithTimeout(timeout).Should(Succeed()) - } + } + }).WithTimeout(timeout).Should(Succeed()) + } func (env *Environment) EventuallyExpectKarpenterRestarted() { @@ -373,6 +384,23 @@ func (env *Environment) EventuallyExpectPendingPodCount(selector labels.Selector }).Should(Succeed()) } +func (env *Environment) EventuallyExpectBoundPodCount(selector labels.Selector, numPods int) []*v1.Pod { + GinkgoHelper() + var res []*v1.Pod + Eventually(func(g Gomega) { + res = []*v1.Pod{} + podList := &v1.PodList{} + g.Expect(env.Client.List(env.Context, podList, client.MatchingLabelsSelector{Selector: selector})).To(Succeed()) + for i := range podList.Items { + if podList.Items[i].Spec.NodeName != "" { + res = append(res, &podList.Items[i]) + } + } + g.Expect(res).To(HaveLen(numPods)) + }).Should(Succeed()) + return res +} + func (env *Environment) EventuallyExpectHealthyPodCount(selector labels.Selector, numPods int) []*v1.Pod { By(fmt.Sprintf("waiting for %d pods matching selector %s to be ready", numPods, selector.String())) GinkgoHelper() @@ -458,6 +486,27 @@ func (env *Environment) ConsistentlyExpectNodeCount(comparator string, count int return lo.ToSlicePtr(nodeList.Items) } +func (env *Environment) ConsistentlyExpectNoDisruptions(nodeCount int, duration string) { + GinkgoHelper() + Consistently(func(g Gomega) { + // Ensure we don't change our NodeClaims + nodeClaimList := &corev1beta1.NodeClaimList{} + g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed()) + g.Expect(nodeClaimList.Items).To(HaveLen(nodeCount)) + + nodeList := &v1.NodeList{} + g.Expect(env.Client.List(env, nodeList, client.HasLabels{test.DiscoveryLabel})).To(Succeed()) + g.Expect(nodeList.Items).To(HaveLen(nodeCount)) + + for _, node := range nodeList.Items { + _, ok := lo.Find(node.Spec.Taints, func(t v1.Taint) bool { + return corev1beta1.IsDisruptingTaint(t) + }) + g.Expect(ok).To(BeFalse()) + } + }, duration).Should(Succeed()) +} + func (env *Environment) EventuallyExpectTaintedNodeCount(comparator string, count int) []*v1.Node { GinkgoHelper() By(fmt.Sprintf("waiting for tainted nodes to be %s to %d", comparator, count)) @@ -572,6 +621,7 @@ func (env *Environment) EventuallyExpectCreatedNodeClaimCount(comparator string, } func (env *Environment) EventuallyExpectNodeClaimsReady(nodeClaims ...*corev1beta1.NodeClaim) { + GinkgoHelper() Eventually(func(g Gomega) { for _, nc := range nodeClaims { temp := &corev1beta1.NodeClaim{} @@ -581,6 +631,36 @@ func (env *Environment) EventuallyExpectNodeClaimsReady(nodeClaims ...*corev1bet }).Should(Succeed()) } +func (env *Environment) EventuallyExpectExpired(nodeClaims ...*corev1beta1.NodeClaim) { + GinkgoHelper() + Eventually(func(g Gomega) { + for _, nc := range nodeClaims { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) + g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) + } + }).Should(Succeed()) +} + +func (env *Environment) EventuallyExpectDrifted(nodeClaims ...*corev1beta1.NodeClaim) { + GinkgoHelper() + Eventually(func(g Gomega) { + for _, nc := range nodeClaims { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) + g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) + } + }).Should(Succeed()) +} + +func (env *Environment) EventuallyExpectEmpty(nodeClaims ...*corev1beta1.NodeClaim) { + GinkgoHelper() + Eventually(func(g Gomega) { + for _, nc := range nodeClaims { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) + g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Empty).IsTrue()).To(BeTrue()) + } + }).Should(Succeed()) +} + func (env *Environment) GetNode(nodeName string) v1.Node { GinkgoHelper() var node v1.Node diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 3950df033e95..d3d600363c29 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -27,6 +27,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/controller-runtime/pkg/client" @@ -71,7 +72,7 @@ var _ = BeforeEach(func() { var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) -var _ = Describe("Drift", Label("AWS"), func() { +var _ = Describe("Drift", func() { var dep *appsv1.Deployment var selector labels.Selector var numPods int @@ -131,17 +132,17 @@ var _ = Describe("Drift", Label("AWS"), func() { selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) env.ExpectCreated(nodeClass, nodePool, dep) - env.EventuallyExpectCreatedNodeClaimCount("==", 3) - env.EventuallyExpectCreatedNodeCount("==", 3) + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) + nodes := env.EventuallyExpectCreatedNodeCount("==", 3) env.EventuallyExpectHealthyPodCount(selector, int(numPods)) env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after expiration // List nodes so that we get any updated information on the nodes. If we don't // we have the potential to over-write any changes Karpenter makes to the nodes. - nodes := env.EventuallyExpectNodeCount("==", 3) // Add a finalizer to each node so that we can stop termination disruptions By("adding finalizers to the nodes to prevent termination") for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) node.Finalizers = append(node.Finalizers, common.TestingFinalizer) env.ExpectUpdated(node) } @@ -155,15 +156,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} env.ExpectUpdated(nodePool) - // Expect that the NodeClaims will all be marked drifted - Eventually(func(g Gomega) { - nodeClaimList := &corev1beta1.NodeClaimList{} - err := env.Client.List(env.Context, nodeClaimList) - g.Expect(err).To(Succeed()) - lo.ForEach(nodeClaimList.Items, func(nc corev1beta1.NodeClaim, _ int) { - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaims...) nodes = env.EventuallyExpectTaintedNodeCount("==", 2) @@ -213,8 +206,8 @@ var _ = Describe("Drift", Label("AWS"), func() { selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) env.ExpectCreated(nodeClass, nodePool, dep) - env.EventuallyExpectCreatedNodeClaimCount("==", 3) - env.EventuallyExpectCreatedNodeCount("==", 3) + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) + nodes := env.EventuallyExpectCreatedNodeCount("==", 3) env.EventuallyExpectHealthyPodCount(selector, int(numPods)) env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after drift @@ -225,11 +218,9 @@ var _ = Describe("Drift", Label("AWS"), func() { By("spreading the pods to each of the nodes") env.EventuallyExpectHealthyPodCount(selector, 3) - var nodes []*v1.Node // Delete pods from the deployment until each node has one pod. - nodePods := []*v1.Pod{} + var nodePods []*v1.Pod for { - nodes = env.EventuallyExpectNodeCount("==", 3) node, found := lo.Find(nodes, func(n *v1.Node) bool { nodePods = env.ExpectHealthyPodsForNode(n.Name) return len(nodePods) > 1 @@ -238,6 +229,7 @@ var _ = Describe("Drift", Label("AWS"), func() { break } // Set the nodes to unschedulable so that the pods won't reschedule. + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) node.Spec.Unschedulable = true env.ExpectUpdated(node) for _, pod := range nodePods[1:] { @@ -250,9 +242,9 @@ var _ = Describe("Drift", Label("AWS"), func() { env.EventuallyExpectHealthyPodCount(selector, 3) By("cordoning and adding finalizer to the nodes") - nodes = env.EventuallyExpectNodeCount("==", 3) // Add a finalizer to each node so that we can stop termination disruptions for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) node.Finalizers = append(node.Finalizers, common.TestingFinalizer) // Set nodes as unschedulable so that pod nomination doesn't delay disruption for the second disruption action node.Spec.Unschedulable = true @@ -264,15 +256,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} env.ExpectUpdated(nodePool) - // Expect that the NodeClaims will all be marked drifted - Eventually(func(g Gomega) { - nodeClaimList := &corev1beta1.NodeClaimList{} - err := env.Client.List(env.Context, nodeClaimList) - g.Expect(err).To(Succeed()) - lo.ForEach(nodeClaimList.Items, func(nc corev1beta1.NodeClaim, _ int) { - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaims...) By("enabling disruption by removing the do not disrupt annotation") pods := env.EventuallyExpectHealthyPodCount(selector, 3) @@ -282,11 +266,8 @@ var _ = Describe("Drift", Label("AWS"), func() { env.ExpectUpdated(pod) } - // List nodes so that we get any updated information on the nodes. If we don't - // we have the potential to over-write any changes Karpenter makes to the nodes. - nodes = env.EventuallyExpectNodeCount("==", 3) - // Mark one node as schedulable so the other two nodes can schedule to this node and delete. + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodes[0]), nodes[0])).To(Succeed()) nodes[0].Spec.Unschedulable = false env.ExpectUpdated(nodes[0]) nodes = env.EventuallyExpectTaintedNodeCount("==", 2) @@ -332,15 +313,15 @@ var _ = Describe("Drift", Label("AWS"), func() { selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) env.ExpectCreated(nodeClass, nodePool, dep) - env.EventuallyExpectCreatedNodeClaimCount("==", 3) - env.EventuallyExpectCreatedNodeCount("==", 3) + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) + nodes := env.EventuallyExpectCreatedNodeCount("==", 3) env.EventuallyExpectHealthyPodCount(selector, int(numPods)) env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after drift By("cordoning and adding finalizer to the nodes") - nodes := env.EventuallyExpectNodeCount("==", 3) // Add a finalizer to each node so that we can stop termination disruptions for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) node.Finalizers = append(node.Finalizers, common.TestingFinalizer) // Set nodes as unschedulable so that pod nomination doesn't delay disruption for the second disruption action env.ExpectUpdated(node) @@ -351,15 +332,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} env.ExpectUpdated(nodePool) - // Expect that the NodeClaims will all be marked drifted - Eventually(func(g Gomega) { - nodeClaimList := &corev1beta1.NodeClaimList{} - err := env.Client.List(env.Context, nodeClaimList) - g.Expect(err).To(Succeed()) - lo.ForEach(nodeClaimList.Items, func(nc corev1beta1.NodeClaim, _ int) { - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaims...) By("enabling disruption by removing the do not disrupt annotation") pods := env.EventuallyExpectHealthyPodCount(selector, 3) @@ -369,7 +342,6 @@ var _ = Describe("Drift", Label("AWS"), func() { env.ExpectUpdated(pod) } - nodes = env.EventuallyExpectNodeCount("==", 3) // Expect two nodes tainted, and 2 nodes created tainted := env.EventuallyExpectTaintedNodeCount("==", 2) env.EventuallyExpectCreatedNodeCount("==", 2) @@ -410,10 +382,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: amdAMI}} env.ExpectCreatedOrUpdated(nodeClass) - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaim) delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey) env.ExpectUpdated(pod) @@ -439,11 +408,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: amdAMI}} env.ExpectCreatedOrUpdated(nodeClass) - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted)).ToNot(BeNil()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaim) delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey) env.ExpectUpdated(pod) @@ -543,12 +508,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms env.ExpectCreatedOrUpdated(nodeClass) - By("validating the drifted status condition has propagated") - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted)).ToNot(BeNil()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaim) delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey) env.ExpectUpdated(pod) @@ -569,12 +529,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{{ID: subnets[1].ID}} env.ExpectCreatedOrUpdated(nodeClass) - By("validating the drifted status condition has propagated") - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted)).ToNot(BeNil()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaim) delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey) env.ExpectUpdated(pod) @@ -607,12 +562,7 @@ var _ = Describe("Drift", Label("AWS"), func() { env.ExpectCreatedOrUpdated(updatedNodePool) - By("validating the drifted status condition has propagated") - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted)).ToNot(BeNil()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaim) delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey) env.ExpectUpdated(pod) @@ -681,12 +631,7 @@ var _ = Describe("Drift", Label("AWS"), func() { env.ExpectCreatedOrUpdated(updatedNodeClass) - By("validating the drifted status condition has propagated") - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted)).ToNot(BeNil()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaim) delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey) env.ExpectUpdated(pod) @@ -732,12 +677,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileDriftName) env.ExpectCreatedOrUpdated(nodeClass) - By("validating the drifted status condition has propagated") - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted)).ToNot(BeNil()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectDrifted(nodeClaim) delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey) env.ExpectUpdated(pod) @@ -773,13 +713,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: *parameter.Parameter.Value}} env.ExpectCreatedOrUpdated(nodeClass) - // Should see the nodeClaim has drifted - Eventually(func(g Gomega) { - for _, nodeClaim := range startingNodeClaimState { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - } - }).Should(Succeed()) + env.EventuallyExpectDrifted(startingNodeClaimState...) // Expect nodes To get tainted taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) @@ -829,13 +763,7 @@ var _ = Describe("Drift", Label("AWS"), func() { nodePool.Spec.Template.Spec.StartupTaints = []v1.Taint{{Key: "example.com/taint", Effect: v1.TaintEffectPreferNoSchedule}} env.ExpectCreatedOrUpdated(nodePool) - // Should see the nodeClaim has drifted - Eventually(func(g Gomega) { - for _, nodeClaim := range startingNodeClaimState { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) - } - }).Should(Succeed()) + env.EventuallyExpectDrifted(startingNodeClaimState...) // Expect nodes to be tainted taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) @@ -863,5 +791,50 @@ var _ = Describe("Drift", Label("AWS"), func() { g.Expect(sets.New(nodeClaimUIDs...).IsSuperset(sets.New(startingNodeClaimUIDs...))).To(BeTrue()) }, "2m").Should(Succeed()) }) + It("should not drift any nodes if their PodDisruptionBudgets are unhealthy", func() { + // Create a deployment that contains a readiness probe that will never succeed + // This way, the pod will bind to the node, but the PodDisruptionBudget will never go healthy + var numPods int32 = 2 + dep := coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 2, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, + PodAntiRequirements: []v1.PodAffinityTerm{{ + TopologyKey: v1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "inflate"}, + }}, + }, + + ReadinessProbe: &v1.Probe{ + ProbeHandler: v1.ProbeHandler{ + HTTPGet: &v1.HTTPGetAction{ + Port: intstr.FromInt32(80), + }, + }, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + minAvailable := intstr.FromInt32(numPods - 1) + pdb := coretest.PodDisruptionBudget(coretest.PDBOptions{ + Labels: dep.Spec.Template.Labels, + MinAvailable: &minAvailable, + }) + env.ExpectCreated(dep, nodeClass, nodePool, pdb) + + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) + env.EventuallyExpectCreatedNodeCount("==", int(numPods)) + + // Expect pods to be bound but not to be ready since we are intentionally failing the readiness check + env.EventuallyExpectBoundPodCount(selector, int(numPods)) + + // Drift the nodeclaims + nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} + env.ExpectUpdated(nodePool) + + env.EventuallyExpectDrifted(nodeClaims...) + env.ConsistentlyExpectNoDisruptions(int(numPods), "1m") + }) }) }) diff --git a/test/suites/expiration/suite_test.go b/test/suites/expiration/suite_test.go index 80f912fe27bf..27a4ea28a4c2 100644 --- a/test/suites/expiration/suite_test.go +++ b/test/suites/expiration/suite_test.go @@ -103,15 +103,15 @@ var _ = Describe("Expiration", func() { selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) env.ExpectCreated(nodeClass, nodePool, dep) - env.EventuallyExpectCreatedNodeClaimCount("==", 3) - env.EventuallyExpectCreatedNodeCount("==", 3) + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) + nodes := env.EventuallyExpectCreatedNodeCount("==", 3) env.EventuallyExpectHealthyPodCount(selector, int(numPods)) env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after expiration - nodes := env.EventuallyExpectNodeCount("==", 3) By("adding finalizers to the nodes to prevent termination") // Add a finalizer to each node so that we can stop termination disruptions for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) node.Finalizers = append(node.Finalizers, common.TestingFinalizer) env.ExpectUpdated(node) } @@ -124,15 +124,7 @@ var _ = Describe("Expiration", func() { nodePool.Spec.Disruption.ExpireAfter = corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Second * 30)} env.ExpectUpdated(nodePool) - // Expect that the NodeClaims will all be marked expired - Eventually(func(g Gomega) { - nodeClaimList := &corev1beta1.NodeClaimList{} - err := env.Client.List(env.Context, nodeClaimList) - g.Expect(err).To(Succeed()) - lo.ForEach(nodeClaimList.Items, func(nc corev1beta1.NodeClaim, _ int) { - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) - }) - }).Should(Succeed()) + env.EventuallyExpectExpired(nodeClaims...) // Expect that two nodes are tainted. nodes = env.EventuallyExpectTaintedNodeCount("==", 2) @@ -188,8 +180,8 @@ var _ = Describe("Expiration", func() { selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) env.ExpectCreated(nodeClass, nodePool, dep) - env.EventuallyExpectCreatedNodeClaimCount("==", 3) - env.EventuallyExpectCreatedNodeCount("==", 3) + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) + nodes := env.EventuallyExpectCreatedNodeCount("==", 3) env.EventuallyExpectHealthyPodCount(selector, int(numPods)) env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after expiration @@ -200,11 +192,9 @@ var _ = Describe("Expiration", func() { By("spreading the pods to each of the nodes") env.EventuallyExpectHealthyPodCount(selector, 3) - var nodes []*v1.Node // Delete pods from the deployment until each node has one pod. - nodePods := []*v1.Pod{} + var nodePods []*v1.Pod for { - nodes = env.EventuallyExpectNodeCount("==", 3) node, found := lo.Find(nodes, func(n *v1.Node) bool { nodePods = env.ExpectHealthyPodsForNode(n.Name) return len(nodePods) > 1 @@ -213,6 +203,7 @@ var _ = Describe("Expiration", func() { break } // Set the nodes to unschedulable so that the pods won't reschedule. + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) node.Spec.Unschedulable = true env.ExpectUpdated(node) for _, pod := range nodePods[1:] { @@ -225,9 +216,9 @@ var _ = Describe("Expiration", func() { env.EventuallyExpectHealthyPodCount(selector, 3) By("cordoning and adding finalizer to the nodes") - nodes = env.EventuallyExpectNodeCount("==", 3) // Add a finalizer to each node so that we can stop termination disruptions for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) node.Finalizers = append(node.Finalizers, common.TestingFinalizer) // Set nodes as unschedulable so that pod nomination doesn't delay disruption for the second disruption action node.Spec.Unschedulable = true @@ -239,15 +230,7 @@ var _ = Describe("Expiration", func() { nodePool.Spec.Disruption.ExpireAfter = corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Second * 30)} env.ExpectUpdated(nodePool) - // Expect that the NodeClaims will all be marked expired - Eventually(func(g Gomega) { - nodeClaimList := &corev1beta1.NodeClaimList{} - err := env.Client.List(env.Context, nodeClaimList) - g.Expect(err).To(Succeed()) - lo.ForEach(nodeClaimList.Items, func(nc corev1beta1.NodeClaim, _ int) { - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) - }) - }).Should(Succeed()) + env.EventuallyExpectExpired(nodeClaims...) By("enabling disruption by removing the do not disrupt annotation") pods := env.EventuallyExpectHealthyPodCount(selector, 3) @@ -257,11 +240,8 @@ var _ = Describe("Expiration", func() { env.ExpectUpdated(pod) } - // List nodes so that we get any updated information on the nodes. If we don't - // we have the potential to over-write any changes Karpenter makes to the nodes. - nodes = env.EventuallyExpectNodeCount("==", 3) - // Mark one node as schedulable so the other two nodes can schedule to this node and delete. + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodes[0]), nodes[0])).To(Succeed()) nodes[0].Spec.Unschedulable = false env.ExpectUpdated(nodes[0]) nodes = env.EventuallyExpectTaintedNodeCount("==", 2) @@ -308,15 +288,15 @@ var _ = Describe("Expiration", func() { selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) env.ExpectCreated(nodeClass, nodePool, dep) - env.EventuallyExpectCreatedNodeClaimCount("==", 3) - env.EventuallyExpectCreatedNodeCount("==", 3) + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) + nodes := env.EventuallyExpectCreatedNodeCount("==", 3) env.EventuallyExpectHealthyPodCount(selector, int(numPods)) env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after drift By("cordoning and adding finalizer to the nodes") - nodes := env.EventuallyExpectNodeCount("==", 3) // Add a finalizer to each node so that we can stop termination disruptions for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) node.Finalizers = append(node.Finalizers, common.TestingFinalizer) // Set nodes as unschedulable so that pod nomination doesn't delay disruption for the second disruption action env.ExpectUpdated(node) @@ -327,15 +307,7 @@ var _ = Describe("Expiration", func() { nodePool.Spec.Disruption.ExpireAfter = corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Second * 90)} env.ExpectUpdated(nodePool) - // Expect that the NodeClaims will all be marked expired - Eventually(func(g Gomega) { - nodeClaimList := &corev1beta1.NodeClaimList{} - err := env.Client.List(env.Context, nodeClaimList) - g.Expect(err).To(Succeed()) - lo.ForEach(nodeClaimList.Items, func(nc corev1beta1.NodeClaim, _ int) { - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) - }) - }).Should(Succeed()) + env.EventuallyExpectExpired(nodeClaims...) By("enabling disruption by removing the do not disrupt annotation") pods := env.EventuallyExpectHealthyPodCount(selector, 3) @@ -345,7 +317,6 @@ var _ = Describe("Expiration", func() { env.ExpectUpdated(pod) } - nodes = env.EventuallyExpectNodeCount("==", 3) // Expect two nodes tainted and two nodes created tainted := env.EventuallyExpectTaintedNodeCount("==", 2) env.EventuallyExpectCreatedNodeCount("==", 2) @@ -392,11 +363,7 @@ var _ = Describe("Expiration", func() { env.EventuallyExpectHealthyPodCount(selector, int(numPods)) env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after expiration - // Expect that the NodeClaim will get an expired status condition - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectExpired(nodeClaim) // Remove the do-not-disrupt annotation so that the Nodes are now deprovisionable for _, pod := range env.ExpectPodsMatchingSelector(selector) { @@ -460,11 +427,7 @@ var _ = Describe("Expiration", func() { nodePool.Spec.Disruption.ExpireAfter.Duration = lo.ToPtr(time.Minute) env.ExpectUpdated(nodePool) - // Expect that the NodeClaim will get an expired status condition - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectExpired(nodeClaim) // Remove the do-not-disruption annotation so that the Nodes are now deprovisionable for _, pod := range env.ExpectPodsMatchingSelector(selector) { @@ -495,7 +458,7 @@ var _ = Describe("Expiration", func() { env.EventuallyExpectCreatedNodeCount("==", 1) env.EventuallyExpectHealthyPodCount(selector, int(numPods)) }) - Context("Expiration Failure", func() { + Context("Failure", func() { It("should not continue to expire if a node never registers", func() { // Launch a new NodeClaim var numPods int32 = 2 @@ -528,13 +491,7 @@ var _ = Describe("Expiration", func() { } env.ExpectCreatedOrUpdated(nodeClass) - // Should see the NodeClaim has expired - Eventually(func(g Gomega) { - for _, nc := range startingNodeClaimState { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) - } - }).Should(Succeed()) + env.EventuallyExpectExpired(startingNodeClaimState...) // Expect nodes To get tainted taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) @@ -560,7 +517,7 @@ var _ = Describe("Expiration", func() { g.Expect(sets.New(nodeClaimUIDs...).IsSuperset(sets.New(startingNodeClaimUIDs...))).To(BeTrue()) }, "2m").Should(Succeed()) }) - It("should not continue to expiration if a node registers but never becomes initialized", func() { + It("should not continue to expire if a node registers but never becomes initialized", func() { // Set a configuration that will allow us to make a NodeClaim not be initialized nodePool.Spec.Template.Spec.StartupTaints = []v1.Taint{{Key: "example.com/taint", Effect: v1.TaintEffectPreferNoSchedule}} @@ -593,13 +550,7 @@ var _ = Describe("Expiration", func() { } }).Should(Succeed()) - // Should see the NodeClaim has expired - Eventually(func(g Gomega) { - for _, nc := range startingNodeClaimState { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) - } - }).Should(Succeed()) + env.EventuallyExpectExpired(startingNodeClaimState...) // Expect nodes To be tainted taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) @@ -627,5 +578,46 @@ var _ = Describe("Expiration", func() { g.Expect(sets.New(nodeClaimUIDs...).IsSuperset(sets.New(startingNodeClaimUIDs...))).To(BeTrue()) }, "2m").Should(Succeed()) }) + It("should not expire any nodes if their PodDisruptionBudgets are unhealthy", func() { + // Create a deployment that contains a readiness probe that will never succeed + // This way, the pod will bind to the node, but the PodDisruptionBudget will never go healthy + var numPods int32 = 2 + dep := coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 2, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, + PodAntiRequirements: []v1.PodAffinityTerm{{ + TopologyKey: v1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "inflate"}, + }}, + }, + + ReadinessProbe: &v1.Probe{ + ProbeHandler: v1.ProbeHandler{ + HTTPGet: &v1.HTTPGetAction{ + Port: intstr.FromInt32(80), + }, + }, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + minAvailable := intstr.FromInt32(numPods - 1) + pdb := coretest.PodDisruptionBudget(coretest.PDBOptions{ + Labels: dep.Spec.Template.Labels, + MinAvailable: &minAvailable, + }) + env.ExpectCreated(dep, nodeClass, nodePool, pdb) + + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) + env.EventuallyExpectCreatedNodeCount("==", int(numPods)) + + // Expect pods to be bound but not to be ready since we are intentionally failing the readiness check + env.EventuallyExpectBoundPodCount(selector, int(numPods)) + + env.EventuallyExpectExpired(nodeClaims...) + env.ConsistentlyExpectNoDisruptions(int(numPods), "1m") + }) }) }) diff --git a/test/suites/integration/emptiness_test.go b/test/suites/integration/emptiness_test.go index 424c969c719c..d1ba42d3b18c 100644 --- a/test/suites/integration/emptiness_test.go +++ b/test/suites/integration/emptiness_test.go @@ -49,11 +49,7 @@ var _ = Describe("Emptiness", func() { deployment.Spec.Replicas = ptr.Int32(0) Expect(env.Client.Patch(env, deployment, client.MergeFrom(persisted))).To(Succeed()) - By("waiting for the nodeclaim emptiness status condition to propagate") - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Empty).IsTrue()).To(BeTrue()) - }).Should(Succeed()) + env.EventuallyExpectEmpty(nodeClaim) By("waiting for the nodeclaim to deprovision when past its ConsolidateAfter timeout of 0") nodePool.Spec.Disruption.ConsolidateAfter = &corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} diff --git a/test/suites/interruption/suite_test.go b/test/suites/interruption/suite_test.go index 6a26a257c14b..bd338cd39899 100644 --- a/test/suites/interruption/suite_test.go +++ b/test/suites/interruption/suite_test.go @@ -67,7 +67,7 @@ var _ = BeforeEach(func() { var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) -var _ = Describe("Interruption", Label("AWS"), func() { +var _ = Describe("Interruption", func() { It("should terminate the spot instance and spin-up a new node on spot interruption warning", func() { By("Creating a single healthy node with a healthy deployment") nodePool = coretest.ReplaceRequirements(nodePool, v1.NodeSelectorRequirement{