From 08c4218f338a3f09a39083d6f7ed56070871dcb4 Mon Sep 17 00:00:00 2001 From: cebernardi Date: Mon, 21 Mar 2022 17:27:55 +0000 Subject: [PATCH] allow pod scheduling if they express soft pod affinity / anti affinity (#1541) * allow pod scheduling if they express soft pod affinity / anti affinity * changed error message to make it more clear Co-authored-by: Ellis Tarn * improved error message to make it clearer * added pod affinity and anti-affinity preferences relaxation Co-authored-by: Ellis Tarn --- pkg/controllers/selection/controller.go | 8 +-- pkg/controllers/selection/preferences.go | 32 ++++++++++ pkg/controllers/selection/suite_test.go | 80 ++++++++++++++++++++++-- pkg/utils/pod/scheduling.go | 16 ++--- 4 files changed, 119 insertions(+), 17 deletions(-) diff --git a/pkg/controllers/selection/controller.go b/pkg/controllers/selection/controller.go index eb4a918c5e01..e55926c3d934 100644 --- a/pkg/controllers/selection/controller.go +++ b/pkg/controllers/selection/controller.go @@ -142,11 +142,11 @@ func validateAffinity(p *v1.Pod) (errs error) { if p.Spec.Affinity == nil { return nil } - if pod.HasPodAffinity(p) { - errs = multierr.Append(errs, fmt.Errorf("pod affinity is not supported")) + if pod.HasRequiredPodAffinity(p) { + errs = multierr.Append(errs, fmt.Errorf("pod affinity rule 'requiredDuringSchedulingIgnoreDuringExecution' is not supported")) } - if pod.HasPodAntiAffinity(p) { - errs = multierr.Append(errs, fmt.Errorf("pod anti-affinity is not supported")) + if pod.HasRequiredPodAntiAffinity(p) { + errs = multierr.Append(errs, fmt.Errorf("pod anti-affinity rule 'requiredDuringSchedulingIgnoreDuringExecution' is not supported")) } if p.Spec.Affinity.NodeAffinity != nil { for _, term := range p.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution { diff --git a/pkg/controllers/selection/preferences.go b/pkg/controllers/selection/preferences.go index a3a15a69f2fa..4971a0258580 100644 --- a/pkg/controllers/selection/preferences.go +++ b/pkg/controllers/selection/preferences.go @@ -71,6 +71,8 @@ func (p *Preferences) Relax(ctx context.Context, pod *v1.Pod) { func (p *Preferences) relax(ctx context.Context, pod *v1.Pod) bool { for _, relaxFunc := range []func(*v1.Pod) *string{ + func(pod *v1.Pod) *string { return p.removePreferredPodAffinityTerm(pod) }, + func(pod *v1.Pod) *string { return p.removePreferredPodAntiAffinityTerm(pod) }, func(pod *v1.Pod) *string { return p.removePreferredNodeAffinityTerm(pod) }, func(pod *v1.Pod) *string { return p.removeRequiredNodeAffinityTerm(pod) }, func(pod *v1.Pod) *string { return p.toleratePreferNoScheduleTaints(pod) }, @@ -98,6 +100,36 @@ func (p *Preferences) removePreferredNodeAffinityTerm(pod *v1.Pod) *string { return nil } +func (p *Preferences) removePreferredPodAffinityTerm(pod *v1.Pod) *string { + if pod.Spec.Affinity == nil || pod.Spec.Affinity.PodAffinity == nil || len(pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution) == 0 { + return nil + } + terms := pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution + // Remove the all the terms + if len(terms) > 0 { + // Sort descending by weight to remove heaviest preferences to try lighter ones + sort.SliceStable(terms, func(i, j int) bool { return terms[i].Weight > terms[j].Weight }) + pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = terms[1:] + return ptr.String(fmt.Sprintf("removing: spec.affinity.podAffinity.preferredDuringSchedulingIgnoredDuringExecution[0]=%s", pretty.Concise(terms[0]))) + } + return nil +} + +func (p *Preferences) removePreferredPodAntiAffinityTerm(pod *v1.Pod) *string { + if pod.Spec.Affinity == nil || pod.Spec.Affinity.PodAntiAffinity == nil || len(pod.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) == 0 { + return nil + } + terms := pod.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution + // Remove the all the terms + if len(terms) > 0 { + // Sort descending by weight to remove heaviest preferences to try lighter ones + sort.SliceStable(terms, func(i, j int) bool { return terms[i].Weight > terms[j].Weight }) + pod.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution = terms[1:] + return ptr.String(fmt.Sprintf("removing: spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0]=%s", pretty.Concise(terms[0]))) + } + return nil +} + func (p *Preferences) removeRequiredNodeAffinityTerm(pod *v1.Pod) *string { if pod.Spec.Affinity == nil || pod.Spec.Affinity.NodeAffinity == nil || diff --git a/pkg/controllers/selection/suite_test.go b/pkg/controllers/selection/suite_test.go index 0810ef91e05b..f7bd7c4f1918 100644 --- a/pkg/controllers/selection/suite_test.go +++ b/pkg/controllers/selection/suite_test.go @@ -179,7 +179,7 @@ var _ = Describe("Preferential Fallback", func() { }) }) Context("Preferred", func() { - It("should relax all terms", func() { + It("should relax all node affinity terms", func() { pod := test.UnschedulablePod() pod.Spec.Affinity = &v1.Affinity{NodeAffinity: &v1.NodeAffinity{PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{ { @@ -203,6 +203,76 @@ var _ = Describe("Preferential Fallback", func() { pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] ExpectScheduled(ctx, env.Client, pod) }) + It("should relax all pod affinity terms", func() { + pod := test.UnschedulablePod() + pod.Spec.Affinity = &v1.Affinity{ + PodAffinity: &v1.PodAffinity{PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 1, PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + TopologyKey: v1.LabelTopologyZone, + }}, + { + Weight: 1, PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + TopologyKey: v1.LabelHostname, + }}, + }}, + NodeAffinity: &v1.NodeAffinity{PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{ + { + Weight: 1, Preference: v1.NodeSelectorTerm{MatchExpressions: []v1.NodeSelectorRequirement{ + {Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"invalid"}}, + }}, + }}}, + } + // Remove first pod affinity term + pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] + ExpectNotScheduled(ctx, env.Client, pod) + // Remove second pod affinity term + pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] + ExpectNotScheduled(ctx, env.Client, pod) + // Remove node affinity term + pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] + ExpectNotScheduled(ctx, env.Client, pod) + // Success + pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] + ExpectScheduled(ctx, env.Client, pod) + }) + It("should relax all pod anti-affinity terms", func() { + pod := test.UnschedulablePod() + pod.Spec.Affinity = &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 1, PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + TopologyKey: v1.LabelTopologyZone, + }}, + { + Weight: 1, PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + TopologyKey: v1.LabelHostname, + }}, + }}, + NodeAffinity: &v1.NodeAffinity{PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{ + { + Weight: 1, Preference: v1.NodeSelectorTerm{MatchExpressions: []v1.NodeSelectorRequirement{ + {Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"invalid"}}, + }}, + }}}, + } + // Remove first pod anti-affinity term + pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] + ExpectNotScheduled(ctx, env.Client, pod) + // Remove second pod anti-affinity term + pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] + ExpectNotScheduled(ctx, env.Client, pod) + // Remove node affinity term + pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] + ExpectNotScheduled(ctx, env.Client, pod) + // Success + pod = ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, pod)[0] + ExpectScheduled(ctx, env.Client, pod) + }) It("should relax to use lighter weights", func() { provisioner.Spec.Requirements = v1alpha5.NewRequirements( v1.NodeSelectorRequirement{Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, Values: []string{"test-zone-1", "test-zone-2"}}) @@ -321,19 +391,19 @@ var _ = Describe("Pod Affinity and AntiAffinity", func() { }))[0] ExpectNotScheduled(ctx, env.Client, pod) }) - It("should not schedule a pod with pod affinity preference", func() { + It("should schedule a pod with pod affinity preference", func() { ExpectCreated(ctx, env.Client) pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, test.UnschedulablePod(test.PodOptions{ PodPreferences: []v1.WeightedPodAffinityTerm{{Weight: 1, PodAffinityTerm: v1.PodAffinityTerm{TopologyKey: "foo"}}}, }))[0] - ExpectNotScheduled(ctx, env.Client, pod) + ExpectScheduled(ctx, env.Client, pod) }) - It("should not schedule a pod with pod anti-affinity preference", func() { + It("should schedule a pod with pod anti-affinity preference", func() { ExpectCreated(ctx, env.Client) pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, test.UnschedulablePod(test.PodOptions{ PodAntiPreferences: []v1.WeightedPodAffinityTerm{{Weight: 1, PodAffinityTerm: v1.PodAffinityTerm{TopologyKey: "foo"}}}, }))[0] - ExpectNotScheduled(ctx, env.Client, pod) + ExpectScheduled(ctx, env.Client, pod) }) It("should schedule a pod with empty pod affinity and anti-affinity", func() { ExpectCreated(ctx, env.Client) diff --git a/pkg/utils/pod/scheduling.go b/pkg/utils/pod/scheduling.go index 6c93ba37fe50..885369a5b5ad 100644 --- a/pkg/utils/pod/scheduling.go +++ b/pkg/utils/pod/scheduling.go @@ -68,16 +68,16 @@ func IsOwnedBy(pod *v1.Pod, gvks []schema.GroupVersionKind) bool { return false } -// HasPodAffinity returns true if a non-empty PodAffinity is defined in the pod spec -func HasPodAffinity(pod *v1.Pod) bool { +// HasRequiredPodAffinity returns true if a non-empty PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution +// is defined in the pod spec +func HasRequiredPodAffinity(pod *v1.Pod) bool { return pod.Spec.Affinity.PodAffinity != nil && - (len(pod.Spec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 || - len(pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution) != 0) + (len(pod.Spec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0) } -// HasPodAntiAffinity returns true if a non-empty PodAntiAffinity is defined in the pod spec -func HasPodAntiAffinity(pod *v1.Pod) bool { +// HasRequiredPodAntiAffinity returns true if a non-empty PodAntiAffinity/RequiredDuringSchedulingIgnoredDuringExecution +// is defined in the pod spec +func HasRequiredPodAntiAffinity(pod *v1.Pod) bool { return pod.Spec.Affinity.PodAntiAffinity != nil && - (len(pod.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 || - len(pod.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) != 0) + (len(pod.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0) }