From 0ddc478fdf69d22c12b3ef7023ac2147ccc078e0 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Tue, 8 Mar 2022 10:58:43 -0600 Subject: [PATCH] add pod affinity/anti-affinity tests that are skip()'d This sets up some groundwork for beginning work on pod affinity/anti-affinity. --- pkg/apis/provisioning/v1alpha5/suite_test.go | 2 +- pkg/cloudprovider/aws/suite_test.go | 46 +- .../provisioning/scheduling/suite_test.go | 718 +++++++++++++++++- 3 files changed, 749 insertions(+), 17 deletions(-) diff --git a/pkg/apis/provisioning/v1alpha5/suite_test.go b/pkg/apis/provisioning/v1alpha5/suite_test.go index dd711c89455d..4227742adfec 100644 --- a/pkg/apis/provisioning/v1alpha5/suite_test.go +++ b/pkg/apis/provisioning/v1alpha5/suite_test.go @@ -191,7 +191,7 @@ var _ = Describe("Validation", func() { It("should fail because DoesNotExists conflicting", func() { for _, op := range []v1.NodeSelectorOperator{v1.NodeSelectorOpIn, v1.NodeSelectorOpNotIn, v1.NodeSelectorOpExists} { provisioner.Spec.Requirements = NewRequirements( - v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOperator(op), Values: []string{"test"}}, + v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: op, Values: []string{"test"}}, v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpDoesNotExist}, ) } diff --git a/pkg/cloudprovider/aws/suite_test.go b/pkg/cloudprovider/aws/suite_test.go index 7a84c446c89f..241276954283 100644 --- a/pkg/cloudprovider/aws/suite_test.go +++ b/pkg/cloudprovider/aws/suite_test.go @@ -207,6 +207,48 @@ var _ = Describe("Allocation", func() { } Expect(nodeNames.Len()).To(Equal(2)) }) + It("should not schedule a non-GPU workload on a node w/GPU", func() { + nodeNames := sets.NewString() + for _, pod := range ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + test.UnschedulablePod(test.PodOptions{ + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + resources.NvidiaGPU: resource.MustParse("1"), + "cpu": resource.MustParse("31"), + }, + Limits: v1.ResourceList{ + resources.NvidiaGPU: resource.MustParse("1"), + "cpu": resource.MustParse("31"), + }, + }, + }), + // Can't pack onto the same instance due to consuming too much CPU + test.UnschedulablePod(test.PodOptions{ + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + "cpu": resource.MustParse("3"), + }, + Limits: v1.ResourceList{ + "cpu": resource.MustParse("3"), + }, + }, + }), + ) { + node := ExpectScheduled(ctx, env.Client, pod) + + // This test has a GPU workload that nearly maxes out the test instance type. It's intended to ensure + // that the second pod won't get a GPU node since it doesn't require one, even though it's compatible + // with the first pod that does require a GPU. + if _, isGpuPod := pod.Spec.Containers[0].Resources.Requests[resources.NvidiaGPU]; isGpuPod { + Expect(node.Labels).To(HaveKeyWithValue(v1.LabelInstanceTypeStable, "p3.8xlarge")) + } else { + Expect(node.Labels).ToNot(HaveKeyWithValue(v1.LabelInstanceTypeStable, "p3.8xlarge")) + } + nodeNames.Insert(node.Name) + } + Expect(nodeNames.Len()).To(Equal(2)) + }) + It("should launch instances for AWS Neuron resource requests", func() { nodeNames := sets.NewString() for _, pod := range ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, @@ -280,8 +322,8 @@ var _ = Describe("Allocation", func() { pod.Spec.Affinity = &v1.Affinity{NodeAffinity: &v1.NodeAffinity{PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{ { Weight: 1, Preference: v1.NodeSelectorTerm{MatchExpressions: []v1.NodeSelectorRequirement{ - {Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-1a"}}, - }}, + {Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-1a"}}, + }}, }, }}} pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] diff --git a/pkg/controllers/provisioning/scheduling/suite_test.go b/pkg/controllers/provisioning/scheduling/suite_test.go index 12fb4836fb39..8a4ec6d714fe 100644 --- a/pkg/controllers/provisioning/scheduling/suite_test.go +++ b/pkg/controllers/provisioning/scheduling/suite_test.go @@ -186,6 +186,12 @@ var _ = Describe("Constraints", func() { node := ExpectScheduled(ctx, env.Client, pod) Expect(node.Labels).To(HaveKeyWithValue(v1.LabelTopologyZone, "test-zone-2")) }) + It("should not schedule nodes with a hostname selector", func() { + pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, test.UnschedulablePod( + test.PodOptions{NodeSelector: map[string]string{v1.LabelHostname: "red-node"}}, + ))[0] + ExpectNotScheduled(ctx, env.Client, pod) + }) It("should not schedule the pod if nodeselector unknown", func() { provisioner.Spec.Requirements = v1alpha5.NewRequirements( v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-1"}}) @@ -468,7 +474,6 @@ var _ = Describe("Topology", func() { ))[0] ExpectNotScheduled(ctx, env.Client, pod) }) - Context("Zonal", func() { It("should balance pods across zones", func() { topology := []v1.TopologySpreadConstraint{{ @@ -502,6 +507,53 @@ var _ = Describe("Topology", func() { ) ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(2, 2)) }) + It("should not violate max-skew when unsat = do not schedule", func() { + Skip("enable after scheduler no longer violates max-skew") + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelTopologyZone, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + // force this pod onto zone-1 + provisioner.Spec.Requirements = v1alpha5.NewRequirements( + v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-1"}}) + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: labels}, TopologySpreadConstraints: topology})) + ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(1)) + + // now only allow scheduling pods on zone-2 and zone-3 + provisioner.Spec.Requirements = v1alpha5.NewRequirements( + v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-2", "test-zone-3"}}) + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + MakePods(10, test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: labels}, TopologySpreadConstraints: topology})..., + ) + + // max skew of 1, so test-zone-2/3 will have 2 nodes each and the rest of the pods will fail to schedule + ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(1, 2, 2)) + }) + It("should violate max-skew when unsat = schedule anyway", func() { + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelTopologyZone, + WhenUnsatisfiable: v1.ScheduleAnyway, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + provisioner.Spec.Requirements = v1alpha5.NewRequirements( + v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-1"}}) + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: labels}, TopologySpreadConstraints: topology})) + ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(1)) + + provisioner.Spec.Requirements = v1alpha5.NewRequirements( + v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-2", "test-zone-3"}}) + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + MakePods(10, test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: labels}, TopologySpreadConstraints: topology})..., + ) + + // max skew of 1, so test-zone-2/3 will have 2 nodes each and the rest of the pods will fail to schedule + ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(1, 5, 5)) + }) It("should only count running/scheduled pods with matching labels scheduled to nodes with a corresponding domain", func() { wrongNamespace := strings.ToLower(randomdata.SillyName()) firstNode := test.Node(test.NodeOptions{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{v1.LabelTopologyZone: "test-zone-1"}}}) @@ -543,6 +595,30 @@ var _ = Describe("Topology", func() { ) ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(1)) }) + It("should handle a non-self selecting topology", func() { + Skip("enable after scheduler handles non-self selecting topology") + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + pods := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + MakePods(5, test.PodOptions{TopologySpreadConstraints: topology})..., + ) + // This is weird, but the topology label selector is used for determining domain counts. The pod that + // owns the topology is what the spread actually applies to. In this test case, there are no pods matching + // the label selector, so the max skew is zero. This means we can pack all the pods onto the same node since + // it doesn't violate the topology spread constraint (i.e. adding new pods doesn't increase skew since the + // pods we are adding don't count toward skew). This behavior is called out at + // https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ , though it's not + // recommended for users. + nodeNames := map[string]struct{}{} + for _, p := range pods { + nodeNames[p.Spec.NodeName] = struct{}{} + } + Expect(nodeNames).To(HaveLen(1)) + }) }) Context("Hostname", func() { @@ -576,6 +652,41 @@ var _ = Describe("Topology", func() { ) ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(4)) }) + It("balance multiple deployments with hostname topology spread", func() { + Skip("enable after scheduler doesn't fail when scheduling disparate workloads") + // Issue #1425 + spreadPod := func(appName string) test.PodOptions { + return test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": appName, + }, + }, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": appName}, + }, + }, + }, + } + } + + scheduled := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + test.UnschedulablePod(spreadPod("app1")), test.UnschedulablePod(spreadPod("app1")), + test.UnschedulablePod(spreadPod("app2")), test.UnschedulablePod(spreadPod("app2"))) + + for _, p := range scheduled { + ExpectScheduled(ctx, env.Client, p) + } + nodes := v1.NodeList{} + Expect(env.Client.List(ctx, &nodes)).To(Succeed()) + // this wasn't part of #1425, but ensures that we launch the minimum number of nodes + Expect(nodes.Items).To(HaveLen(2)) + }) }) Context("Combined Hostname and Zonal Topology", func() { @@ -618,11 +729,11 @@ var _ = Describe("Topology", func() { }) // https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/#interaction-with-node-affinity-and-node-selectors - Context("Combined Zonal Topology and Affinity", func() { + Context("Combined Zonal Topology and Node Affinity", func() { It("should limit spread options by nodeSelector", func() { topology := []v1.TopologySpreadConstraint{{ TopologyKey: v1.LabelTopologyZone, - WhenUnsatisfiable: v1.DoNotSchedule, + WhenUnsatisfiable: v1.ScheduleAnyway, LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, MaxSkew: 1, }} @@ -649,23 +760,36 @@ var _ = Describe("Topology", func() { LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, MaxSkew: 1, }} - ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, append( + + // need to limit the provisioner to only zone-1, zone-2 or else it will know that test-zone-3 has 0 pods and won't violate + // the max-skew + provisioner.Spec.Requirements = v1alpha5.NewRequirements( + v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-1", "test-zone-2"}}) + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, MakePods(6, test.PodOptions{ ObjectMeta: metav1.ObjectMeta{Labels: labels}, TopologySpreadConstraints: topology, NodeRequirements: []v1.NodeSelectorRequirement{{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{ "test-zone-1", "test-zone-2", }}}, - }), - MakePods(1, test.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: labels}, - TopologySpreadConstraints: topology, - NodeRequirements: []v1.NodeSelectorRequirement{{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpNotIn, Values: []string{ - "test-zone-2", "test-zone-3", - }}}, - })..., - )...) - ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(4, 3)) + })...) + ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(3, 3)) + + // open the provisioner back to up so it can see all zones again + provisioner.Spec.Requirements = v1alpha5.NewRequirements( + v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-1", "test-zone-2", "test-zone-3"}}) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, MakePods(1, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + TopologySpreadConstraints: topology, + NodeRequirements: []v1.NodeSelectorRequirement{{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{ + "test-zone-2", "test-zone-3", + }}}, + })...) + + // it will schedule on the currently empty zone-3 even though max-skew is violated as it improves max-skew + ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(3, 3, 1)) + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, MakePods(5, test.PodOptions{ ObjectMeta: metav1.ObjectMeta{Labels: labels}, @@ -675,6 +799,515 @@ var _ = Describe("Topology", func() { ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(4, 4, 4)) }) }) + + Context("Pod Affinity", func() { + It("should schedule a pod with empty pod affinity and anti-affinity", func() { + Skip("enable after pod-affinity is finished") + ExpectCreated(ctx, env.Client) + pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, test.UnschedulablePod(test.PodOptions{ + PodRequirements: []v1.PodAffinityTerm{}, + PodAntiRequirements: []v1.PodAffinityTerm{}, + }))[0] + ExpectScheduled(ctx, env.Client, pod) + }) + It("should respect pod affinity", func() { + Skip("enable after pod-affinity is finished") + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + + affLabels := map[string]string{"security": "s2"} + + affPod1 := test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: affLabels}}) + // affPod2 will try to get scheduled with affPod1 + affPod2 := test.UnschedulablePod(test.PodOptions{PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + TopologyKey: v1.LabelHostname, + }}}) + + var pods []*v1.Pod + pods = append(pods, MakePods(10, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + TopologySpreadConstraints: topology, + })...) + pods = append(pods, affPod1) + pods = append(pods, affPod2) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pods...) + n1 := ExpectScheduled(ctx, env.Client, affPod1) + n2 := ExpectScheduled(ctx, env.Client, affPod2) + // should be scheduled on the same node + Expect(n1.Name).To(Equal(n2.Name)) + }) + It("should respect self pod affinity", func() { + Skip("enable after pod-affinity is finished") + affLabels := map[string]string{"security": "s2"} + + pods := MakePods(3, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: affLabels, + }, + PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + TopologyKey: v1.LabelHostname, + }}, + }) + + pods = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pods...) + nodeNames := map[string]struct{}{} + for _, p := range pods { + n := ExpectScheduled(ctx, env.Client, p) + nodeNames[n.Name] = struct{}{} + } + Expect(len(nodeNames)).To(Equal(1)) + }) + It("should allow violation of preferred pod affinity", func() { + Skip("enable after pod-affinity is finished") + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + + affPod2 := test.UnschedulablePod(test.PodOptions{PodPreferences: []v1.WeightedPodAffinityTerm{{ + Weight: 50, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"security": "s2"}, + }, + TopologyKey: v1.LabelHostname, + }, + }}}) + + var pods []*v1.Pod + pods = append(pods, MakePods(10, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + TopologySpreadConstraints: topology, + })...) + + pods = append(pods, affPod2) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pods...) + // should be scheduled as the pod it has affinity doesn't exist, but it's only a preference and not a + // hard constraints + ExpectScheduled(ctx, env.Client, affPod2) + + }) + It("should allow violation of preferred pod anti-affinity", func() { + Skip("enable after pod-affinity is finished") + affPods := MakePods(10, test.PodOptions{PodAntiPreferences: []v1.WeightedPodAffinityTerm{ + { + Weight: 50, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + TopologyKey: v1.LabelTopologyZone, + }, + }, + }}) + + var pods []*v1.Pod + pods = append(pods, MakePods(3, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelTopologyZone, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }}, + })...) + + pods = append(pods, affPods...) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pods...) + for _, aff := range affPods { + ExpectScheduled(ctx, env.Client, aff) + } + + }) + It("should separate nodes using simple pod anti-affinity on hostname", func() { + Skip("enable after pod-affinity is finished") + affLabels := map[string]string{"security": "s2"} + + affPod1 := test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: affLabels}}) + // affPod2 will avoid affPod1 + affPod2 := test.UnschedulablePod(test.PodOptions{PodAntiRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + TopologyKey: v1.LabelHostname, + }}}) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, affPod1, affPod2) + n1 := ExpectScheduled(ctx, env.Client, affPod1) + n2 := ExpectScheduled(ctx, env.Client, affPod2) + // should not be scheduled on the same node + Expect(n1.Name).ToNot(Equal(n2.Name)) + }) + It("should choose the node with the highest weight when using multiple weighted preferences", func() { + Skip("enable after pod-affinity is finished") + dbLabels := map[string]string{"type": "db", "spread": "spread"} + webLabels := map[string]string{"type": "web", "spread": "spread"} + cacheLabels := map[string]string{"type": "cache", "spread": "spread"} + + // ensure our three target pods are spread across nodes + tsc := []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"spread": "spread"}, + }, + }, + } + + var targetPods []*v1.Pod + // 50 pods we can land on, but prefer not to + targetPods = append(targetPods, MakePods(25, test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: dbLabels}, TopologySpreadConstraints: tsc})...) + targetPods = append(targetPods, MakePods(25, test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: cacheLabels}, TopologySpreadConstraints: tsc})...) + // one pod we prefer with the highest weight + targetPods = append(targetPods, test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: webLabels}, TopologySpreadConstraints: tsc})) + // and the pod that wants to land on the web node + targetPods = append(targetPods, test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Name: "affinity-pod"}, + PodPreferences: []v1.WeightedPodAffinityTerm{ + { + Weight: 25, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: dbLabels, + }, + TopologyKey: v1.LabelHostname, + }, + }, + { + Weight: 50, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: webLabels, + }, + TopologyKey: v1.LabelHostname, + }, + }, + { + Weight: 49, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: cacheLabels, + }, + TopologyKey: v1.LabelHostname, + }, + }, + }})) + + pods := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, targetPods...) + var webNodeName string + var affNodeName string + for _, p := range pods { + ExpectScheduled(ctx, env.Client, p) + if p.Labels["type"] == "web" { + webNodeName = p.Spec.NodeName + } else if _, ok := p.Labels["type"]; !ok { + affNodeName = p.Spec.NodeName + } + } + Expect(webNodeName).To(Equal(affNodeName)) + }) + It("should allow violation of a pod affinity preference with a conflicting required constraint", func() { + Skip("enable after pod-affinity is finished") + affLabels := map[string]string{"security": "s2"} + + constraint := v1.TopologySpreadConstraint{ + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + } + affPod1 := test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: affLabels}}) + affPods := MakePods(3, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + // limit these pods to one per host + TopologySpreadConstraints: []v1.TopologySpreadConstraint{constraint}, + // with a preference to the other pod + PodPreferences: []v1.WeightedPodAffinityTerm{{ + Weight: 50, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + TopologyKey: v1.LabelHostname, + }, + }}}) + pods := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, append(affPods, affPod1)...) + // all pods should be scheduled since the affinity term is just a preference + for _, pod := range pods { + ExpectScheduled(ctx, env.Client, pod) + } + // and we'll get three nodes due to the topology spread + ExpectSkew(ctx, env.Client, "", &constraint).To(ConsistOf(1, 1, 1)) + }) + It("should support pod anti-affinity with a zone topology", func() { + Skip("enable after pod-affinity is finished") + affLabels := map[string]string{"security": "s2"} + + // affPods will avoid being scheduled in the same zone + affPods := MakePods(10, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: affLabels}, + PodAntiRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + TopologyKey: v1.LabelTopologyZone, + }}}) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, affPods...) + + // we should get one pod per zone, and 7 failed to schedule pods + top := &v1.TopologySpreadConstraint{TopologyKey: v1.LabelTopologyZone} + ExpectSkew(ctx, env.Client, "default", top).To(ConsistOf(1, 1, 1)) + }) + It("should not schedule pods with affinity to a non-existent pod", func() { + Skip("enable after pod-affinity is finished") + affLabels := map[string]string{"security": "s2"} + + affPods := MakePods(10, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: affLabels}, + PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + TopologyKey: v1.LabelTopologyZone, + }}}) + + pods := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, affPods...) + // the pod we have affinity to is not on the cluster, so all of these pods are unschedulable + for _, p := range pods { + ExpectNotScheduled(ctx, env.Client, p) + } + }) + It("should support pod affinity with zone topology", func() { + Skip("enable after pod-affinity is finished") + affLabels := map[string]string{"security": "s2"} + + // the pod that the others have an affinity to + affPod1 := test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: affLabels}}) + + // affPods will all be scheduled in the same zone as affPod1 + affPods := MakePods(10, test.PodOptions{ + PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + TopologyKey: v1.LabelTopologyZone, + }}}) + + affPods = append(affPods, affPod1) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, affPods...) + top := &v1.TopologySpreadConstraint{TopologyKey: v1.LabelTopologyZone} + ExpectSkew(ctx, env.Client, "default", top).To(ConsistOf(11)) + }) + It("should support pod affinity with hostname topology and hostname topology spread", func() { + Skip("enable after pod-affinity is finished") + affLabels := map[string]string{"security": "s2"} + + labelSelector := &metav1.LabelSelector{ + MatchLabels: affLabels, + } + // the pod that the others have an affinity to + affPod1 := test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: affLabels}}) + + // these pods want to run on different nodes due to topology spread, but also want to run on the same host + // as affPod1 + affPods := MakePods(10, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + }, + }, + PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: labelSelector, + TopologyKey: v1.LabelHostname, + }}}) + + affPods = append(affPods, affPod1) + + pods := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, affPods...) + nodeNames := map[string]int{} + for _, p := range pods { + if p.Spec.NodeName == "" { + ExpectNotScheduled(ctx, env.Client, p) + } else { + ExpectScheduled(ctx, env.Client, p) + nodeNames[p.Spec.NodeName]++ + } + } + + // should have one node + Expect(nodeNames).To(HaveLen(1)) + // with two pods + for _, podCount := range nodeNames { + Expect(podCount).To(Equal(2)) + } + }) + It("should handle multiple chained affinities", func() { + Skip("enable after pod-affinity is finished") + dbLabels := map[string]string{"type": "db", "spread": "spread"} + webLabels := map[string]string{"type": "web", "spread": "spread"} + cacheLabels := map[string]string{"type": "cache", "spread": "spread"} + + // we have to schedule DB -> Web -> Cache in that order or else there are pod affinity violations + pods := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, + test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: dbLabels}}), + test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: cacheLabels}, + PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"type": "web"}}, + TopologyKey: v1.LabelHostname}, + }}), + test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: webLabels}, + PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"type": "db"}}, + TopologyKey: v1.LabelHostname}, + }}), + ) + for _, pod := range pods { + ExpectScheduled(ctx, env.Client, pod) + } + }) + It("should filter pod affinity topologies by namespace, no matching pods", func() { + Skip("enable after pod-affinity is finished") + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + + ExpectCreated(ctx, env.Client, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "other-ns-no-match"}}) + affLabels := map[string]string{"security": "s2"} + + affPod1 := test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: affLabels, Namespace: "other-ns-no-match"}}) + // affPod2 will try to get scheduled with affPod1 + affPod2 := test.UnschedulablePod(test.PodOptions{PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + TopologyKey: v1.LabelHostname, + }}}) + + var pods []*v1.Pod + // creates 10 nodes due to topo spread + pods = append(pods, MakePods(10, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + TopologySpreadConstraints: topology, + })...) + pods = append(pods, affPod1) + pods = append(pods, affPod2) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pods...) + + // the target pod gets scheduled + ExpectScheduled(ctx, env.Client, affPod1) + // but the one with affinity does not since the target pod is not in the same namespace and doesn't + // match the namespace list or namespace selector + ExpectNotScheduled(ctx, env.Client, affPod2) + }) + It("should filter pod affinity topologies by namespace, matching pods namespace list", func() { + Skip("enable after pod-affinity is finished") + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + + ExpectCreated(ctx, env.Client, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "other-ns-list"}}) + affLabels := map[string]string{"security": "s2"} + + affPod1 := test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: affLabels, Namespace: "other-ns-list"}}) + // affPod2 will try to get scheduled with affPod1 + affPod2 := test.UnschedulablePod(test.PodOptions{PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + Namespaces: []string{"other-ns-list"}, + TopologyKey: v1.LabelHostname, + }}}) + + var pods []*v1.Pod + // create 10 nodes + pods = append(pods, MakePods(10, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + TopologySpreadConstraints: topology, + })...) + // put our target pod on one of them + pods = append(pods, affPod1) + // and our pod with affinity should schedule on the same node + pods = append(pods, affPod2) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pods...) + n1 := ExpectScheduled(ctx, env.Client, affPod1) + n2 := ExpectScheduled(ctx, env.Client, affPod2) + // should be scheduled on the same node + Expect(n1.Name).To(Equal(n2.Name)) + }) + It("should filter pod affinity topologies by namespace, matching pods namespace selector", func() { + Skip("enable after pod-affinity is finished") + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + + ExpectCreated(ctx, env.Client, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "other-ns-selector", Labels: map[string]string{"foo": "bar"}}}) + affLabels := map[string]string{"security": "s2"} + + affPod1 := test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: affLabels, Namespace: "other-ns-selector"}}) + // affPod2 will try to get scheduled with affPod1 + affPod2 := test.UnschedulablePod(test.PodOptions{PodRequirements: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affLabels, + }, + // select all pods, in all namespaces that match this selector + NamespaceSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + TopologyKey: v1.LabelHostname, + }}}) + + var pods []*v1.Pod + // create 10 nodes + pods = append(pods, MakePods(10, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + TopologySpreadConstraints: topology, + })...) + // put our target pod on one of them + pods = append(pods, affPod1) + // and our pod with affinity should schedule on the same node + pods = append(pods, affPod2) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pods...) + n1 := ExpectScheduled(ctx, env.Client, affPod1) + n2 := ExpectScheduled(ctx, env.Client, affPod2) + // should be scheduled on the same node due to the namespace selector + Expect(n1.Name).To(Equal(n2.Name)) + }) + }) }) var _ = Describe("Taints", func() { @@ -761,6 +1394,63 @@ var _ = Describe("Taints", func() { }) }) +var _ = Describe("Incompatible pod networking", func() { + Context("container HostPort usage", func() { + It("shouldn't co-locate pods that use the same HostPort and protocol", func() { + Skip("enable after scheduler is aware of hostport usage") + port := v1.ContainerPort{ + Name: "test-port", + HostPort: 80, + ContainerPort: 1234, + Protocol: "TCP", + } + pod1 := test.UnschedulablePod() + pod1.Spec.Containers[0].Ports = append(pod1.Spec.Containers[0].Ports, port) + pod2 := test.UnschedulablePod() + pod2.Spec.Containers[0].Ports = append(pod2.Spec.Containers[0].Ports, port) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod1, pod2) + node1 := ExpectScheduled(ctx, env.Client, pod1) + node2 := ExpectScheduled(ctx, env.Client, pod2) + Expect(node1.Name).ToNot(Equal(node2.Name)) + }) + It("should co-locate pods that use the same HostPort but a different protocol", func() { + port := v1.ContainerPort{ + Name: "test-port", + HostPort: 80, + ContainerPort: 1234, + Protocol: "TCP", + } + pod1 := test.UnschedulablePod() + pod1.Spec.Containers[0].Ports = append(pod1.Spec.Containers[0].Ports, port) + pod2 := test.UnschedulablePod() + port.Protocol = "UDP" + pod2.Spec.Containers[0].Ports = append(pod2.Spec.Containers[0].Ports, port) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod1, pod2) + node1 := ExpectScheduled(ctx, env.Client, pod1) + node2 := ExpectScheduled(ctx, env.Client, pod2) + Expect(node1.Name).To(Equal(node2.Name)) + }) + It("should co-locate pods that don't use HostPort", func() { + port := v1.ContainerPort{ + Name: "test-port", + ContainerPort: 1234, + Protocol: "TCP", + } + pod1 := test.UnschedulablePod() + pod1.Spec.Containers[0].Ports = append(pod1.Spec.Containers[0].Ports, port) + pod2 := test.UnschedulablePod() + pod2.Spec.Containers[0].Ports = append(pod2.Spec.Containers[0].Ports, port) + + ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod1, pod2) + node1 := ExpectScheduled(ctx, env.Client, pod1) + node2 := ExpectScheduled(ctx, env.Client, pod2) + Expect(node1.Name).To(Equal(node2.Name)) + }) + }) +}) + func MakePods(count int, options test.PodOptions) (pods []*v1.Pod) { for i := 0; i < count; i++ { pods = append(pods, test.UnschedulablePod(options))