diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index adbc5cdda0ee..2cc37dcdc41f 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -45,6 +45,7 @@ import ( pscheduling "sigs.k8s.io/karpenter/pkg/controllers/provisioning/scheduling" "sigs.k8s.io/karpenter/pkg/scheduling" "sigs.k8s.io/karpenter/pkg/test" + coreresources "sigs.k8s.io/karpenter/pkg/utils/resources" ) func (env *Environment) ExpectCreated(objects ...client.Object) { @@ -902,3 +903,24 @@ func (env *Environment) GetDaemonSetCount(np *corev1beta1.NodePool) int { return true }) } + +func (env *Environment) GetDaemonSetOverhead(np *corev1beta1.NodePool) v1.ResourceList { + GinkgoHelper() + + // Performs the same logic as the scheduler to get the number of daemonset + // pods that we estimate we will need to schedule as overhead to each node + daemonSetList := &appsv1.DaemonSetList{} + Expect(env.Client.List(env.Context, daemonSetList)).To(Succeed()) + + return coreresources.RequestsForPods(lo.FilterMap(daemonSetList.Items, func(ds appsv1.DaemonSet, _ int) (*v1.Pod, bool) { + p := &v1.Pod{Spec: ds.Spec.Template.Spec} + nodeClaimTemplate := pscheduling.NewNodeClaimTemplate(np) + if err := scheduling.Taints(nodeClaimTemplate.Spec.Taints).Tolerates(p); err != nil { + return nil, false + } + if err := nodeClaimTemplate.Requirements.Compatible(scheduling.NewPodRequirements(p), scheduling.AllowUndefinedWellKnownLabels); err != nil { + return nil, false + } + return p, true + })...) +} diff --git a/test/suites/consolidation/suite_test.go b/test/suites/consolidation/suite_test.go index 3ed5408c5aa0..2690129cfd69 100644 --- a/test/suites/consolidation/suite_test.go +++ b/test/suites/consolidation/suite_test.go @@ -560,7 +560,14 @@ var _ = Describe("Consolidation", func() { }, }, ResourceRequirements: v1.ResourceRequirements{ - Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")}, + Requests: v1.ResourceList{ + v1.ResourceCPU: func() resource.Quantity { + dsOverhead := env.GetDaemonSetOverhead(nodePool) + base := lo.ToPtr(resource.MustParse("1800m")) + base.Sub(*dsOverhead.Cpu()) + return *base + }(), + }, }, }, }) @@ -673,7 +680,13 @@ var _ = Describe("Consolidation", func() { }, }, ResourceRequirements: v1.ResourceRequirements{ - Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")}, + Requests: v1.ResourceList{v1.ResourceCPU: func() resource.Quantity { + dsOverhead := env.GetDaemonSetOverhead(nodePool) + base := lo.ToPtr(resource.MustParse("1800m")) + base.Sub(*dsOverhead.Cpu()) + return *base + }(), + }, }, }, }) diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 49ceb84a55d0..bf7fc52b4da4 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -272,13 +272,6 @@ var _ = Describe("Drift", func() { Values: []string{"xlarge"}, }, }, - // Add an Exists operator so that we can select on a fake partition later - corev1beta1.NodeSelectorRequirementWithMinValues{ - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: "test-partition", - Operator: v1.NodeSelectorOpExists, - }, - }, ) nodePool.Labels = appLabels // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. @@ -286,36 +279,30 @@ var _ = Describe("Drift", func() { Nodes: "3", }} - // Make 5 pods all with different deployments and different test partitions, so that each pod can be put - // on a separate node. - selector = labels.SelectorFromSet(appLabels) - numPods = 5 - deployments := make([]*appsv1.Deployment, numPods) - for i := range lo.Range(numPods) { - deployments[i] = coretest.Deployment(coretest.DeploymentOptions{ - Replicas: 1, - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: appLabels, - }, - NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)}, - // Each xlarge has 4 cpu, so each node should fit no more than 1 pod. - ResourceRequirements: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("3"), - }, - }, + // Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node + deployment := coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 5, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: appLabels, }, - }) - } + PodAntiRequirements: []v1.PodAffinityTerm{{ + TopologyKey: v1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: appLabels, + }, + }}, + }, + }) - env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4]) + // env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4]) + env.ExpectCreated(nodeClass, nodePool, deployment) originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 5) originalNodes := env.EventuallyExpectCreatedNodeCount("==", 5) // Check that all deployment pods are online - env.EventuallyExpectHealthyPodCount(selector, numPods) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(appLabels), numPods) By("cordoning and adding finalizer to the nodes") // Add a finalizer to each node so that we can stop termination disruptions diff --git a/test/suites/expiration/suite_test.go b/test/suites/expiration/suite_test.go index 168381716de6..7151c43a6a88 100644 --- a/test/suites/expiration/suite_test.go +++ b/test/suites/expiration/suite_test.go @@ -354,13 +354,6 @@ var _ = Describe("Expiration", func() { Values: []string{"xlarge"}, }, }, - // Add an Exists operator so that we can select on a fake partition later - corev1beta1.NodeSelectorRequirementWithMinValues{ - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: "test-partition", - Operator: v1.NodeSelectorOpExists, - }, - }, ) nodePool.Labels = appLabels // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. @@ -371,33 +364,28 @@ var _ = Describe("Expiration", func() { // Make 5 pods all with different deployments and different test partitions, so that each pod can be put // on a separate node. selector = labels.SelectorFromSet(appLabels) - numPods = 5 - deployments := make([]*appsv1.Deployment, numPods) - for i := range lo.Range(numPods) { - deployments[i] = coretest.Deployment(coretest.DeploymentOptions{ - Replicas: 1, - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: appLabels, - }, - NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)}, - // Each xlarge has 4 cpu, so each node should fit no more than 1 pod. - ResourceRequirements: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("3"), - }, - }, + deployment := coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 5, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: appLabels, }, - }) - } + PodAntiRequirements: []v1.PodAffinityTerm{{ + TopologyKey: v1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: appLabels, + }, + }}, + }, + }) - env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4]) + env.ExpectCreated(nodeClass, nodePool, deployment) env.EventuallyExpectCreatedNodeClaimCount("==", 5) nodes := env.EventuallyExpectCreatedNodeCount("==", 5) // Check that all daemonsets and deployment pods are online - env.EventuallyExpectHealthyPodCount(selector, numPods) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(appLabels), numPods) By("cordoning and adding finalizer to the nodes") // Add a finalizer to each node so that we can stop termination disruptions diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index 9c9dba0f6a26..c22748c70772 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -450,11 +450,37 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { if version, err := env.GetK8sMinorVersion(0); err != nil || version < 29 { Skip("native sidecar containers are only enabled on EKS 1.29+") } + + labels := map[string]string{"test": test.RandomName()} + // Create a buffer pod to even out the total resource requests regardless of the daemonsets on the cluster. Assumes + // CPU is the resource in contention and that total daemonset CPU requests <= 3. + dsBufferPod := test.Pod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + TopologyKey: v1.LabelHostname, + }}, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: func() resource.Quantity { + dsOverhead := env.GetDaemonSetOverhead(nodePool) + base := lo.ToPtr(resource.MustParse("3")) + base.Sub(*dsOverhead.Cpu()) + return *base + }(), + }, + }, + }) + test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: v1.NodeSelectorRequirement{ Key: v1beta1.LabelInstanceCPU, Operator: v1.NodeSelectorOpIn, - Values: []string{"1", "2"}, + Values: []string{"4", "8"}, }, }, corev1beta1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: v1.NodeSelectorRequirement{ @@ -464,15 +490,24 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }, }) pod := test.Pod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + TopologyKey: v1.LabelHostname, + }}, InitContainers: initContainers, ResourceRequirements: containerRequirements, }) - env.ExpectCreated(nodePool, nodeClass, pod) + env.ExpectCreated(nodePool, nodeClass, dsBufferPod, pod) env.EventuallyExpectHealthy(pod) node := env.ExpectCreatedNodeCount("==", 1)[0] Expect(node.ObjectMeta.GetLabels()[v1beta1.LabelInstanceCPU]).To(Equal(expectedNodeCPU)) }, - Entry("sidecar requirements + later init requirements do exceed container requirements", "2", v1.ResourceRequirements{ + Entry("sidecar requirements + later init requirements do exceed container requirements", "8", v1.ResourceRequirements{ Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")}, }, ephemeralInitContainer(v1.ResourceRequirements{ Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")}, @@ -484,7 +519,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }, ephemeralInitContainer(v1.ResourceRequirements{ Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1")}, })), - Entry("sidecar requirements + later init requirements do not exceed container requirements", "1", v1.ResourceRequirements{ + Entry("sidecar requirements + later init requirements do not exceed container requirements", "4", v1.ResourceRequirements{ Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")}, }, ephemeralInitContainer(v1.ResourceRequirements{ Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")}, @@ -496,7 +531,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }, ephemeralInitContainer(v1.ResourceRequirements{ Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")}, })), - Entry("init container requirements exceed all later requests", "2", v1.ResourceRequirements{ + Entry("init container requirements exceed all later requests", "8", v1.ResourceRequirements{ Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")}, }, v1.Container{ RestartPolicy: lo.ToPtr(v1.ContainerRestartPolicyAlways), diff --git a/test/suites/integration/utilization_test.go b/test/suites/integration/utilization_test.go index 7599de17d2c8..798093f46c1c 100644 --- a/test/suites/integration/utilization_test.go +++ b/test/suites/integration/utilization_test.go @@ -25,6 +25,8 @@ import ( corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + "github.com/samber/lo" + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/test/pkg/debug" @@ -49,8 +51,20 @@ var _ = Describe("Utilization", Label(debug.NoWatch), Label(debug.NoEvents), fun }, ) deployment := test.Deployment(test.DeploymentOptions{ - Replicas: 100, - PodOptions: test.PodOptions{ResourceRequirements: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")}}}}) + Replicas: 100, + PodOptions: test.PodOptions{ + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: func() resource.Quantity { + dsOverhead := env.GetDaemonSetOverhead(nodePool) + base := lo.ToPtr(resource.MustParse("1800m")) + base.Sub(*dsOverhead.Cpu()) + return *base + }(), + }, + }, + }, + }) env.ExpectCreated(nodeClass, nodePool, deployment) env.EventuallyExpectHealthyPodCountWithTimeout(time.Minute*10, labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas))