Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: fix daemonset sensitive tests #6123

Merged
merged 1 commit into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions test/pkg/environment/common/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import (
pscheduling "sigs.k8s.io/karpenter/pkg/controllers/provisioning/scheduling"
"sigs.k8s.io/karpenter/pkg/scheduling"
"sigs.k8s.io/karpenter/pkg/test"
coreresources "sigs.k8s.io/karpenter/pkg/utils/resources"
)

func (env *Environment) ExpectCreated(objects ...client.Object) {
Expand Down Expand Up @@ -902,3 +903,24 @@ func (env *Environment) GetDaemonSetCount(np *corev1beta1.NodePool) int {
return true
})
}

func (env *Environment) GetDaemonSetOverhead(np *corev1beta1.NodePool) v1.ResourceList {
GinkgoHelper()

// Performs the same logic as the scheduler to get the number of daemonset
// pods that we estimate we will need to schedule as overhead to each node
daemonSetList := &appsv1.DaemonSetList{}
Expect(env.Client.List(env.Context, daemonSetList)).To(Succeed())

return coreresources.RequestsForPods(lo.FilterMap(daemonSetList.Items, func(ds appsv1.DaemonSet, _ int) (*v1.Pod, bool) {
p := &v1.Pod{Spec: ds.Spec.Template.Spec}
nodeClaimTemplate := pscheduling.NewNodeClaimTemplate(np)
if err := scheduling.Taints(nodeClaimTemplate.Spec.Taints).Tolerates(p); err != nil {
return nil, false
}
if err := nodeClaimTemplate.Requirements.Compatible(scheduling.NewPodRequirements(p), scheduling.AllowUndefinedWellKnownLabels); err != nil {
return nil, false
}
return p, true
})...)
}
17 changes: 15 additions & 2 deletions test/suites/consolidation/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,14 @@ var _ = Describe("Consolidation", func() {
},
},
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")},
Requests: v1.ResourceList{
v1.ResourceCPU: func() resource.Quantity {
dsOverhead := env.GetDaemonSetOverhead(nodePool)
base := lo.ToPtr(resource.MustParse("1800m"))
base.Sub(*dsOverhead.Cpu())
jmdeal marked this conversation as resolved.
Show resolved Hide resolved
return *base
}(),
},
},
},
})
Expand Down Expand Up @@ -673,7 +680,13 @@ var _ = Describe("Consolidation", func() {
},
},
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")},
Requests: v1.ResourceList{v1.ResourceCPU: func() resource.Quantity {
dsOverhead := env.GetDaemonSetOverhead(nodePool)
base := lo.ToPtr(resource.MustParse("1800m"))
base.Sub(*dsOverhead.Cpu())
return *base
}(),
},
},
},
})
Expand Down
46 changes: 16 additions & 30 deletions test/suites/drift/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,50 +272,36 @@ var _ = Describe("Drift", func() {
Values: []string{"xlarge"},
},
},
// Add an Exists operator so that we can select on a fake partition later
corev1beta1.NodeSelectorRequirementWithMinValues{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Key: "test-partition",
Operator: v1.NodeSelectorOpExists,
},
},
)
nodePool.Labels = appLabels
// We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time.
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
Nodes: "3",
}}

// Make 5 pods all with different deployments and different test partitions, so that each pod can be put
// on a separate node.
selector = labels.SelectorFromSet(appLabels)
numPods = 5
deployments := make([]*appsv1.Deployment, numPods)
for i := range lo.Range(numPods) {
deployments[i] = coretest.Deployment(coretest.DeploymentOptions{
Replicas: 1,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: appLabels,
},
NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)},
// Each xlarge has 4 cpu, so each node should fit no more than 1 pod.
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3"),
},
},
// Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node
deployment := coretest.Deployment(coretest.DeploymentOptions{
jmdeal marked this conversation as resolved.
Show resolved Hide resolved
Replicas: 5,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: appLabels,
},
})
}
PodAntiRequirements: []v1.PodAffinityTerm{{
TopologyKey: v1.LabelHostname,
LabelSelector: &metav1.LabelSelector{
MatchLabels: appLabels,
},
}},
},
})

env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4])
env.ExpectCreated(nodeClass, nodePool, deployment)

originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 5)
originalNodes := env.EventuallyExpectCreatedNodeCount("==", 5)

// Check that all deployment pods are online
env.EventuallyExpectHealthyPodCount(selector, numPods)
env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(appLabels), numPods)

By("cordoning and adding finalizer to the nodes")
// Add a finalizer to each node so that we can stop termination disruptions
Expand Down
42 changes: 15 additions & 27 deletions test/suites/expiration/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,6 @@ var _ = Describe("Expiration", func() {
Values: []string{"xlarge"},
},
},
// Add an Exists operator so that we can select on a fake partition later
corev1beta1.NodeSelectorRequirementWithMinValues{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Key: "test-partition",
Operator: v1.NodeSelectorOpExists,
},
},
)
nodePool.Labels = appLabels
// We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time.
Expand All @@ -371,33 +364,28 @@ var _ = Describe("Expiration", func() {
// Make 5 pods all with different deployments and different test partitions, so that each pod can be put
// on a separate node.
selector = labels.SelectorFromSet(appLabels)
numPods = 5
deployments := make([]*appsv1.Deployment, numPods)
for i := range lo.Range(numPods) {
deployments[i] = coretest.Deployment(coretest.DeploymentOptions{
Replicas: 1,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: appLabels,
},
NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)},
// Each xlarge has 4 cpu, so each node should fit no more than 1 pod.
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3"),
},
},
deployment := coretest.Deployment(coretest.DeploymentOptions{
Replicas: 5,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: appLabels,
},
})
}
PodAntiRequirements: []v1.PodAffinityTerm{{
TopologyKey: v1.LabelHostname,
LabelSelector: &metav1.LabelSelector{
MatchLabels: appLabels,
},
}},
},
})

env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4])
env.ExpectCreated(nodeClass, nodePool, deployment)

env.EventuallyExpectCreatedNodeClaimCount("==", 5)
nodes := env.EventuallyExpectCreatedNodeCount("==", 5)

// Check that all daemonsets and deployment pods are online
env.EventuallyExpectHealthyPodCount(selector, numPods)
env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(appLabels), numPods)

By("cordoning and adding finalizer to the nodes")
// Add a finalizer to each node so that we can stop termination disruptions
Expand Down
45 changes: 40 additions & 5 deletions test/suites/integration/scheduling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,37 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() {
if version, err := env.GetK8sMinorVersion(0); err != nil || version < 29 {
Skip("native sidecar containers are only enabled on EKS 1.29+")
}

labels := map[string]string{"test": test.RandomName()}
// Create a buffer pod to even out the total resource requests regardless of the daemonsets on the cluster. Assumes
// CPU is the resource in contention and that total daemonset CPU requests <= 3.
dsBufferPod := test.Pod(test.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: labels,
},
PodRequirements: []v1.PodAffinityTerm{{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: v1.LabelHostname,
}},
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: func() resource.Quantity {
dsOverhead := env.GetDaemonSetOverhead(nodePool)
base := lo.ToPtr(resource.MustParse("3"))
base.Sub(*dsOverhead.Cpu())
return *base
}(),
},
},
})

test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Key: v1beta1.LabelInstanceCPU,
Operator: v1.NodeSelectorOpIn,
Values: []string{"1", "2"},
Values: []string{"4", "8"},
},
}, corev1beta1.NodeSelectorRequirementWithMinValues{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Expand All @@ -464,15 +490,24 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() {
},
})
pod := test.Pod(test.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: labels,
},
PodRequirements: []v1.PodAffinityTerm{{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: v1.LabelHostname,
}},
InitContainers: initContainers,
ResourceRequirements: containerRequirements,
})
env.ExpectCreated(nodePool, nodeClass, pod)
env.ExpectCreated(nodePool, nodeClass, dsBufferPod, pod)
env.EventuallyExpectHealthy(pod)
node := env.ExpectCreatedNodeCount("==", 1)[0]
Expect(node.ObjectMeta.GetLabels()[v1beta1.LabelInstanceCPU]).To(Equal(expectedNodeCPU))
},
Entry("sidecar requirements + later init requirements do exceed container requirements", "2", v1.ResourceRequirements{
Entry("sidecar requirements + later init requirements do exceed container requirements", "8", v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")},
}, ephemeralInitContainer(v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")},
Expand All @@ -484,7 +519,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() {
}, ephemeralInitContainer(v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1")},
})),
Entry("sidecar requirements + later init requirements do not exceed container requirements", "1", v1.ResourceRequirements{
Entry("sidecar requirements + later init requirements do not exceed container requirements", "4", v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")},
}, ephemeralInitContainer(v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")},
Expand All @@ -496,7 +531,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() {
}, ephemeralInitContainer(v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")},
})),
Entry("init container requirements exceed all later requests", "2", v1.ResourceRequirements{
Entry("init container requirements exceed all later requests", "8", v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")},
}, v1.Container{
RestartPolicy: lo.ToPtr(v1.ContainerRestartPolicyAlways),
Expand Down
18 changes: 16 additions & 2 deletions test/suites/integration/utilization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (

corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"

"github.com/samber/lo"

"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"
"github.com/aws/karpenter-provider-aws/test/pkg/debug"

Expand All @@ -49,8 +51,20 @@ var _ = Describe("Utilization", Label(debug.NoWatch), Label(debug.NoEvents), fun
},
)
deployment := test.Deployment(test.DeploymentOptions{
Replicas: 100,
PodOptions: test.PodOptions{ResourceRequirements: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")}}}})
Replicas: 100,
PodOptions: test.PodOptions{
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: func() resource.Quantity {
dsOverhead := env.GetDaemonSetOverhead(nodePool)
base := lo.ToPtr(resource.MustParse("1800m"))
base.Sub(*dsOverhead.Cpu())
return *base
}(),
},
},
},
})

env.ExpectCreated(nodeClass, nodePool, deployment)
env.EventuallyExpectHealthyPodCountWithTimeout(time.Minute*10, labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas))
Expand Down
Loading