From 038a5631d7a6c8448b23b6f8140ddfb993ddd5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Tue, 29 Oct 2024 09:04:55 +0100 Subject: [PATCH] TAS: validation for CQs using TAS (#3320) * TAS: soft validation for ClusterQueues * Review remarks --- apis/kueue/v1beta1/clusterqueue_types.go | 1 + .../provisioningrequestconfig_types.go | 6 + pkg/cache/clusterqueue.go | 52 ++++++- .../provisioning/admissioncheck_reconciler.go | 2 +- .../admissioncheck_reconciler_test.go | 10 +- .../admissionchecks/provisioning/constants.go | 1 - .../provisioning/controller.go | 10 +- .../provisioning/controller_test.go | 4 +- .../admissionchecks/provisioning/indexer.go | 2 +- .../provisioning/indexer_test.go | 14 +- test/e2e/singlecluster/tas_test.go | 5 +- .../provisioning/provisioning_test.go | 6 +- test/integration/tas/tas_job_test.go | 1 + test/integration/tas/tas_test.go | 127 ++++++++++++++++++ 14 files changed, 210 insertions(+), 31 deletions(-) diff --git a/apis/kueue/v1beta1/clusterqueue_types.go b/apis/kueue/v1beta1/clusterqueue_types.go index 55c18bef2a..38d1f4582a 100644 --- a/apis/kueue/v1beta1/clusterqueue_types.go +++ b/apis/kueue/v1beta1/clusterqueue_types.go @@ -33,6 +33,7 @@ const ( ClusterQueueActiveReasonFlavorIndependentAdmissionCheckAppliedPerFlavor = "FlavorIndependentAdmissionCheckAppliedPerFlavor" ClusterQueueActiveReasonMultipleMultiKueueAdmissionChecks = "MultipleMultiKueueAdmissionChecks" ClusterQueueActiveReasonMultiKueueAdmissionCheckAppliedPerFlavor = "MultiKueueAdmissionCheckAppliedPerFlavor" + ClusterQueueActiveReasonNotSupportedWithTopologyAwareScheduling = "NotSupportedWithTopologyAwareScheduling" ClusterQueueActiveReasonUnknown = "Unknown" ClusterQueueActiveReasonReady = "Ready" ) diff --git a/apis/kueue/v1beta1/provisioningrequestconfig_types.go b/apis/kueue/v1beta1/provisioningrequestconfig_types.go index 42de9d2b89..36f6234b56 100644 --- a/apis/kueue/v1beta1/provisioningrequestconfig_types.go +++ b/apis/kueue/v1beta1/provisioningrequestconfig_types.go @@ -21,6 +21,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + // ProvisioningRequestControllerName is the name used by the Provisioning + // Request admission check controller. + ProvisioningRequestControllerName = "kueue.x-k8s.io/provisioning-request" +) + // ProvisioningRequestConfigSpec defines the desired state of ProvisioningRequestConfig type ProvisioningRequestConfigSpec struct { // ProvisioningClassName describes the different modes of provisioning the resources. diff --git a/pkg/cache/clusterqueue.go b/pkg/cache/clusterqueue.go index 5b652b54e0..385e254a4d 100644 --- a/pkg/cache/clusterqueue.go +++ b/pkg/cache/clusterqueue.go @@ -79,7 +79,9 @@ type clusterQueue struct { multipleSingleInstanceControllersChecks map[string][]string // key = controllerName flavorIndependentAdmissionCheckAppliedPerFlavor []string multiKueueAdmissionChecks []string + provisioningAdmissionChecks []string perFlavorMultiKueueAdmissionChecks []string + tasFlavors []kueue.ResourceFlavorReference admittedWorkloadsCount int isStopped bool workloadInfoOptions []workload.InfoOption @@ -236,6 +238,7 @@ func (c *clusterQueue) updateQueueStatus() { len(c.inactiveAdmissionChecks) > 0 || len(c.multipleSingleInstanceControllersChecks) > 0 || len(c.flavorIndependentAdmissionCheckAppliedPerFlavor) > 0 || + c.isTASViolated() || // one multikueue admission check is allowed len(c.multiKueueAdmissionChecks) > 1 || len(c.perFlavorMultiKueueAdmissionChecks) > 0 { @@ -297,6 +300,25 @@ func (c *clusterQueue) inactiveReason() (string, string) { messages = append(messages, fmt.Sprintf("AdmissionCheck(s): %v cannot be set at flavor level", c.flavorIndependentAdmissionCheckAppliedPerFlavor)) } + if features.Enabled(features.TopologyAwareScheduling) && len(c.tasFlavors) > 0 { + if c.HasParent() { + reasons = append(reasons, kueue.ClusterQueueActiveReasonNotSupportedWithTopologyAwareScheduling) + messages = append(messages, "TAS is not supported for cohorts") + } + if c.Preemption.WithinClusterQueue != kueue.PreemptionPolicyNever { + reasons = append(reasons, kueue.ClusterQueueActiveReasonNotSupportedWithTopologyAwareScheduling) + messages = append(messages, "TAS is not supported for preemption within cluster queue") + } + if len(c.multiKueueAdmissionChecks) > 0 { + reasons = append(reasons, kueue.ClusterQueueActiveReasonNotSupportedWithTopologyAwareScheduling) + messages = append(messages, "TAS is not supported with MultiKueue admission check") + } + if len(c.provisioningAdmissionChecks) > 0 { + reasons = append(reasons, kueue.ClusterQueueActiveReasonNotSupportedWithTopologyAwareScheduling) + messages = append(messages, "TAS is not supported with ProvisioningRequest admission check") + } + } + if len(reasons) == 0 { return kueue.ClusterQueueActiveReasonUnknown, "Can't admit new workloads." } @@ -306,6 +328,16 @@ func (c *clusterQueue) inactiveReason() (string, string) { return kueue.ClusterQueueActiveReasonReady, "Can admit new workloads" } +func (c *clusterQueue) isTASViolated() bool { + if !features.Enabled(features.TopologyAwareScheduling) || len(c.tasFlavors) == 0 { + return false + } + return c.HasParent() || + c.Preemption.WithinClusterQueue != kueue.PreemptionPolicyNever || + len(c.multiKueueAdmissionChecks) > 0 || + len(c.provisioningAdmissionChecks) > 0 +} + // UpdateWithFlavors updates a ClusterQueue based on the passed ResourceFlavors set. // Exported only for testing. func (c *clusterQueue) UpdateWithFlavors(flavors map[kueue.ResourceFlavorReference]*kueue.ResourceFlavor) { @@ -315,6 +347,7 @@ func (c *clusterQueue) UpdateWithFlavors(flavors map[kueue.ResourceFlavorReferen func (c *clusterQueue) updateLabelKeys(flavors map[kueue.ResourceFlavorReference]*kueue.ResourceFlavor) { c.missingFlavors = nil + c.tasFlavors = nil for i := range c.ResourceGroups { rg := &c.ResourceGroups[i] if len(rg.Flavors) == 0 { @@ -327,6 +360,9 @@ func (c *clusterQueue) updateLabelKeys(flavors map[kueue.ResourceFlavorReference for k := range flv.Spec.NodeLabels { keys.Insert(k) } + if flv.Spec.TopologyName != nil { + c.tasFlavors = append(c.tasFlavors, fName) + } } else { c.missingFlavors = append(c.missingFlavors, fName) } @@ -343,6 +379,7 @@ func (c *clusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionChec checksPerController := make(map[string][]string, len(c.AdmissionChecks)) singleInstanceControllers := sets.New[string]() multiKueueAdmissionChecks := sets.New[string]() + provisioningAdmissionChecks := sets.New[string]() var missing []string var inactive []string var flavorIndependentCheckOnFlavors []string @@ -362,6 +399,9 @@ func (c *clusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionChec flavorIndependentCheckOnFlavors = append(flavorIndependentCheckOnFlavors, acName) } + if ac.Controller == kueue.ProvisioningRequestControllerName { + provisioningAdmissionChecks.Insert(acName) + } if ac.Controller == kueue.MultiKueueControllerName { // MultiKueue Admission Checks has extra constraints: // - cannot use multiple MultiKueue AdmissionChecks on the same ClusterQueue @@ -379,7 +419,8 @@ func (c *clusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionChec slices.Sort(inactive) slices.Sort(flavorIndependentCheckOnFlavors) slices.Sort(perFlavorMultiKueueChecks) - multiKueueAdmissionChecksList := sets.List(multiKueueAdmissionChecks) + multiKueueChecks := sets.List(multiKueueAdmissionChecks) + provisioningChecks := sets.List(provisioningAdmissionChecks) update := false if !slices.Equal(c.missingAdmissionChecks, missing) { @@ -414,8 +455,13 @@ func (c *clusterQueue) updateWithAdmissionChecks(checks map[string]AdmissionChec } } - if !slices.Equal(c.multiKueueAdmissionChecks, multiKueueAdmissionChecksList) { - c.multiKueueAdmissionChecks = multiKueueAdmissionChecksList + if !slices.Equal(c.multiKueueAdmissionChecks, multiKueueChecks) { + c.multiKueueAdmissionChecks = multiKueueChecks + update = true + } + + if !slices.Equal(c.provisioningAdmissionChecks, provisioningChecks) { + c.provisioningAdmissionChecks = provisioningChecks update = true } diff --git a/pkg/controller/admissionchecks/provisioning/admissioncheck_reconciler.go b/pkg/controller/admissionchecks/provisioning/admissioncheck_reconciler.go index 2819009d3b..87be4ed8d5 100644 --- a/pkg/controller/admissionchecks/provisioning/admissioncheck_reconciler.go +++ b/pkg/controller/admissionchecks/provisioning/admissioncheck_reconciler.go @@ -37,7 +37,7 @@ var _ reconcile.Reconciler = (*acReconciler)(nil) func (a *acReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { ac := &kueue.AdmissionCheck{} - if err := a.client.Get(ctx, req.NamespacedName, ac); err != nil || ac.Spec.ControllerName != ControllerName { + if err := a.client.Get(ctx, req.NamespacedName, ac); err != nil || ac.Spec.ControllerName != kueue.ProvisioningRequestControllerName { return reconcile.Result{}, client.IgnoreNotFound(err) } diff --git a/pkg/controller/admissionchecks/provisioning/admissioncheck_reconciler_test.go b/pkg/controller/admissionchecks/provisioning/admissioncheck_reconciler_test.go index e27b1685e8..68d94cb838 100644 --- a/pkg/controller/admissionchecks/provisioning/admissioncheck_reconciler_test.go +++ b/pkg/controller/admissionchecks/provisioning/admissioncheck_reconciler_test.go @@ -42,7 +42,7 @@ func TestReconcileAdmissionCheck(t *testing.T) { }, "no parameters specified": { check: utiltesting.MakeAdmissionCheck("check1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Generation(1). Obj(), wantCondition: &metav1.Condition{ @@ -56,7 +56,7 @@ func TestReconcileAdmissionCheck(t *testing.T) { "bad ref group": { check: utiltesting.MakeAdmissionCheck("check1"). Parameters("bad.group", ConfigKind, "config1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Generation(1). Obj(), wantCondition: &metav1.Condition{ @@ -70,7 +70,7 @@ func TestReconcileAdmissionCheck(t *testing.T) { "bad ref kind": { check: utiltesting.MakeAdmissionCheck("check1"). Parameters(kueue.GroupVersion.Group, "BadKind", "config1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Generation(1). Obj(), wantCondition: &metav1.Condition{ @@ -84,7 +84,7 @@ func TestReconcileAdmissionCheck(t *testing.T) { "config missing": { check: utiltesting.MakeAdmissionCheck("check1"). Parameters(kueue.GroupVersion.Group, ConfigKind, "config1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Generation(1). Obj(), wantCondition: &metav1.Condition{ @@ -98,7 +98,7 @@ func TestReconcileAdmissionCheck(t *testing.T) { "config found": { check: utiltesting.MakeAdmissionCheck("check1"). Parameters(kueue.GroupVersion.Group, ConfigKind, "config1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Generation(1). Obj(), configs: []kueue.ProvisioningRequestConfig{ diff --git a/pkg/controller/admissionchecks/provisioning/constants.go b/pkg/controller/admissionchecks/provisioning/constants.go index 54df355848..12787cba32 100644 --- a/pkg/controller/admissionchecks/provisioning/constants.go +++ b/pkg/controller/admissionchecks/provisioning/constants.go @@ -18,7 +18,6 @@ package provisioning const ( ConfigKind = "ProvisioningRequestConfig" - ControllerName = "kueue.x-k8s.io/provisioning-request" DeprecatedConsumesAnnotationKey = "cluster-autoscaler.kubernetes.io/consume-provisioning-request" DeprecatedClassNameAnnotationKey = "cluster-autoscaler.kubernetes.io/provisioning-class-name" ConsumesAnnotationKey = "autoscaling.x-k8s.io/consume-provisioning-request" diff --git a/pkg/controller/admissionchecks/provisioning/controller.go b/pkg/controller/admissionchecks/provisioning/controller.go index 2abf529ad4..3ea8d432a5 100644 --- a/pkg/controller/admissionchecks/provisioning/controller.go +++ b/pkg/controller/admissionchecks/provisioning/controller.go @@ -150,7 +150,7 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco } // get the lists of relevant checks - relevantChecks, err := admissioncheck.FilterForController(ctx, c.client, wl.Status.AdmissionChecks, ControllerName) + relevantChecks, err := admissioncheck.FilterForController(ctx, c.client, wl.Status.AdmissionChecks, kueue.ProvisioningRequestControllerName) if err != nil { return reconcile.Result{}, err } @@ -586,7 +586,7 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch workload.SetAdmissionCheckState(&wlPatch.Status.AdmissionChecks, checkState) } if updated { - if err := c.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(ControllerName), client.ForceOwnership); err != nil { + if err := c.client.Status().Patch(ctx, wlPatch, client.Apply, client.FieldOwner(kueue.ProvisioningRequestControllerName), client.ForceOwnership); err != nil { return err } for i := range recorderMessages { @@ -625,7 +625,7 @@ func (a *acHandler) Create(ctx context.Context, event event.CreateEvent, q workq return } - if ac.Spec.ControllerName == ControllerName { + if ac.Spec.ControllerName == kueue.ProvisioningRequestControllerName { err := a.reconcileWorkloadsUsing(ctx, ac.Name, q) if err != nil { ctrl.LoggerFrom(ctx).V(5).Error(err, "Failure on create event", "admissionCheck", klog.KObj(ac)) @@ -640,7 +640,7 @@ func (a *acHandler) Update(ctx context.Context, event event.UpdateEvent, q workq return } - if oldAc.Spec.ControllerName == ControllerName || newAc.Spec.ControllerName == ControllerName { + if oldAc.Spec.ControllerName == kueue.ProvisioningRequestControllerName || newAc.Spec.ControllerName == kueue.ProvisioningRequestControllerName { err := a.reconcileWorkloadsUsing(ctx, oldAc.Name, q) if err != nil { ctrl.LoggerFrom(ctx).V(5).Error(err, "Failure on update event", "admissionCheck", klog.KObj(oldAc)) @@ -654,7 +654,7 @@ func (a *acHandler) Delete(ctx context.Context, event event.DeleteEvent, q workq return } - if ac.Spec.ControllerName == ControllerName { + if ac.Spec.ControllerName == kueue.ProvisioningRequestControllerName { err := a.reconcileWorkloadsUsing(ctx, ac.Name, q) if err != nil { ctrl.LoggerFrom(ctx).V(5).Error(err, "Failure on delete event", "admissionCheck", klog.KObj(ac)) diff --git a/pkg/controller/admissionchecks/provisioning/controller_test.go b/pkg/controller/admissionchecks/provisioning/controller_test.go index 2e44260615..d4dcb5c0f5 100644 --- a/pkg/controller/admissionchecks/provisioning/controller_test.go +++ b/pkg/controller/admissionchecks/provisioning/controller_test.go @@ -263,7 +263,7 @@ func TestReconcile(t *testing.T) { } baseCheck := utiltesting.MakeAdmissionCheck("check1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, ConfigKind, "config1"). Obj() @@ -1370,7 +1370,7 @@ func TestActiveOrLastPRForChecks(t *testing.T) { pr2Created.Name = "wl-check-2" baseCheck := utiltesting.MakeAdmissionCheck("check"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, ConfigKind, "config1"). Obj() diff --git a/pkg/controller/admissionchecks/provisioning/indexer.go b/pkg/controller/admissionchecks/provisioning/indexer.go index 91f48e02ca..e62bbe905b 100644 --- a/pkg/controller/admissionchecks/provisioning/indexer.go +++ b/pkg/controller/admissionchecks/provisioning/indexer.go @@ -65,7 +65,7 @@ func SetupIndexer(ctx context.Context, indexer client.FieldIndexer) error { return fmt.Errorf("setting index on workloads checks: %w", err) } - if err := indexer.IndexField(ctx, &kueue.AdmissionCheck{}, AdmissionCheckUsingConfigKey, admissioncheck.IndexerByConfigFunction(ControllerName, configGVK)); err != nil { + if err := indexer.IndexField(ctx, &kueue.AdmissionCheck{}, AdmissionCheckUsingConfigKey, admissioncheck.IndexerByConfigFunction(kueue.ProvisioningRequestControllerName, configGVK)); err != nil { return fmt.Errorf("setting index on admission checks config: %w", err) } return nil diff --git a/pkg/controller/admissionchecks/provisioning/indexer_test.go b/pkg/controller/admissionchecks/provisioning/indexer_test.go index 696810ef23..a5a6112d6f 100644 --- a/pkg/controller/admissionchecks/provisioning/indexer_test.go +++ b/pkg/controller/admissionchecks/provisioning/indexer_test.go @@ -288,7 +288,7 @@ func TestIndexAdmissionChecks(t *testing.T) { "bad ref group": { checks: []*kueue.AdmissionCheck{ utiltesting.MakeAdmissionCheck("check1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters("core", ConfigKind, "config1"). Obj(), }, @@ -297,7 +297,7 @@ func TestIndexAdmissionChecks(t *testing.T) { "bad ref kind": { checks: []*kueue.AdmissionCheck{ utiltesting.MakeAdmissionCheck("check1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, "kind", "config1"). Obj(), }, @@ -306,7 +306,7 @@ func TestIndexAdmissionChecks(t *testing.T) { "empty name": { checks: []*kueue.AdmissionCheck{ utiltesting.MakeAdmissionCheck("check1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, ConfigKind, ""). Obj(), }, @@ -315,7 +315,7 @@ func TestIndexAdmissionChecks(t *testing.T) { "match": { checks: []*kueue.AdmissionCheck{ utiltesting.MakeAdmissionCheck("check1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, ConfigKind, "config1"). Obj(), }, @@ -325,15 +325,15 @@ func TestIndexAdmissionChecks(t *testing.T) { "multiple checks, partial match": { checks: []*kueue.AdmissionCheck{ utiltesting.MakeAdmissionCheck("check1"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, ConfigKind, "config1"). Obj(), utiltesting.MakeAdmissionCheck("check2"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, ConfigKind, "config1"). Obj(), utiltesting.MakeAdmissionCheck("check3"). - ControllerName(ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, ConfigKind, "config2"). Obj(), }, diff --git a/test/e2e/singlecluster/tas_test.go b/test/e2e/singlecluster/tas_test.go index 751fad05b7..0176262c79 100644 --- a/test/e2e/singlecluster/tas_test.go +++ b/test/e2e/singlecluster/tas_test.go @@ -77,11 +77,10 @@ var _ = ginkgo.Describe("TopologyAwareSchedling", func() { Resource(corev1.ResourceMemory, "1Gi"). Obj(), ). - Preemption(kueue.ClusterQueuePreemption{ - WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, - }). Obj() gomega.Expect(k8sClient.Create(ctx, clusterQueue)).Should(gomega.Succeed()) + util.ExpectClusterQueuesToBeActive(ctx, k8sClient, clusterQueue) + localQueue = testing.MakeLocalQueue("main", ns.Name).ClusterQueue("cluster-queue").Obj() gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) }) diff --git a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go index 61a5ce4ba0..68056ac8b2 100644 --- a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go +++ b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go @@ -121,7 +121,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure gomega.Expect(k8sClient.Create(ctx, prc2)).To(gomega.Succeed()) ac = testing.MakeAdmissionCheck("ac-prov"). - ControllerName(provisioning.ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, "ProvisioningRequestConfig", prc.Name). Obj() gomega.Expect(k8sClient.Create(ctx, ac)).To(gomega.Succeed()) @@ -971,7 +971,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure gomega.Expect(k8sClient.Create(ctx, prc)).To(gomega.Succeed()) ac = testing.MakeAdmissionCheck("ac-prov"). - ControllerName(provisioning.ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, "ProvisioningRequestConfig", prc.Name). Obj() gomega.Expect(k8sClient.Create(ctx, ac)).To(gomega.Succeed()) @@ -1214,7 +1214,7 @@ var _ = ginkgo.Describe("Provisioning", ginkgo.Ordered, ginkgo.ContinueOnFailure gomega.Expect(k8sClient.Create(ctx, prc)).To(gomega.Succeed()) ac = testing.MakeAdmissionCheck("ac-prov"). - ControllerName(provisioning.ControllerName). + ControllerName(kueue.ProvisioningRequestControllerName). Parameters(kueue.GroupVersion.Group, "ProvisioningRequestConfig", prc.Name). Obj() gomega.Expect(k8sClient.Create(ctx, ac)).To(gomega.Succeed()) diff --git a/test/integration/tas/tas_job_test.go b/test/integration/tas/tas_job_test.go index fcea64affd..ea388c5e51 100644 --- a/test/integration/tas/tas_job_test.go +++ b/test/integration/tas/tas_job_test.go @@ -166,6 +166,7 @@ var _ = ginkgo.Describe("Topology Aware Scheduling", ginkgo.Ordered, func() { ResourceGroup(*testing.MakeFlavorQuotas(tasFlavor.Name).Resource(corev1.ResourceCPU, "5").Obj()). Obj() gomega.Expect(k8sClient.Create(ctx, clusterQueue)).Should(gomega.Succeed()) + util.ExpectClusterQueuesToBeActive(ctx, k8sClient, clusterQueue) localQueue = testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(clusterQueue.Name).Obj() gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) diff --git a/test/integration/tas/tas_test.go b/test/integration/tas/tas_test.go index e84b415b8e..64b035742b 100644 --- a/test/integration/tas/tas_test.go +++ b/test/integration/tas/tas_test.go @@ -21,9 +21,13 @@ import ( "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1" + kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/features" "sigs.k8s.io/kueue/pkg/util/testing" + "sigs.k8s.io/kueue/test/util" ) var _ = ginkgo.Describe("Topology Aware Scheduling", ginkgo.Ordered, func() { @@ -66,4 +70,127 @@ var _ = ginkgo.Describe("Topology Aware Scheduling", ginkgo.Ordered, func() { gomega.Expect(k8sClient.Create(ctx, tasFlavor)).Should(gomega.HaveOccurred()) }) }) + + ginkgo.When("Negative scenarios for ClusterQueue configuration", func() { + var ( + topology *kueuealpha.Topology + tasFlavor *kueue.ResourceFlavor + clusterQueue *kueue.ClusterQueue + admissionCheck *kueue.AdmissionCheck + ) + + ginkgo.BeforeEach(func() { + topology = testing.MakeTopology("default").Levels([]string{ + tasBlockLabel, + tasRackLabel, + }).Obj() + gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed()) + + tasFlavor = testing.MakeResourceFlavor("tas-flavor"). + NodeLabel("node-group", "tas"). + TopologyName("default").Obj() + gomega.Expect(k8sClient.Create(ctx, tasFlavor)).Should(gomega.Succeed()) + }) + + ginkgo.AfterEach(func() { + util.ExpectObjectToBeDeleted(ctx, k8sClient, clusterQueue, true) + util.ExpectObjectToBeDeleted(ctx, k8sClient, tasFlavor, true) + util.ExpectObjectToBeDeleted(ctx, k8sClient, topology, true) + util.ExpectObjectToBeDeleted(ctx, k8sClient, admissionCheck, true) + }) + + ginkgo.It("should mark TAS ClusterQueue as inactive if used in cohort", func() { + clusterQueue = testing.MakeClusterQueue("cq"). + ResourceGroup( + *testing.MakeFlavorQuotas(tasFlavor.Name).Resource(corev1.ResourceCPU, "5").Obj(), + ).Cohort("cohort").Obj() + gomega.Expect(k8sClient.Create(ctx, clusterQueue)).Should(gomega.Succeed()) + + gomega.Eventually(func() []metav1.Condition { + var updatedCq kueue.ClusterQueue + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(clusterQueue), &updatedCq)).To(gomega.Succeed()) + return updatedCq.Status.Conditions + }, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]metav1.Condition{ + { + Type: kueue.ClusterQueueActive, + Status: metav1.ConditionFalse, + Reason: "NotSupportedWithTopologyAwareScheduling", + Message: `Can't admit new workloads: TAS is not supported for cohorts.`, + }, + }, util.IgnoreConditionTimestampsAndObservedGeneration)) + }) + + ginkgo.It("should mark TAS ClusterQueue as inactive if used with preemption", func() { + clusterQueue = testing.MakeClusterQueue("cq"). + ResourceGroup( + *testing.MakeFlavorQuotas(tasFlavor.Name).Resource(corev1.ResourceCPU, "5").Obj(), + ).Preemption(kueue.ClusterQueuePreemption{ + WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, + }).Obj() + gomega.Expect(k8sClient.Create(ctx, clusterQueue)).Should(gomega.Succeed()) + + gomega.Eventually(func() []metav1.Condition { + var updatedCq kueue.ClusterQueue + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(clusterQueue), &updatedCq)).To(gomega.Succeed()) + return updatedCq.Status.Conditions + }, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]metav1.Condition{ + { + Type: kueue.ClusterQueueActive, + Status: metav1.ConditionFalse, + Reason: "NotSupportedWithTopologyAwareScheduling", + Message: `Can't admit new workloads: TAS is not supported for preemption within cluster queue.`, + }, + }, util.IgnoreConditionTimestampsAndObservedGeneration)) + }) + + ginkgo.It("should mark TAS ClusterQueue as inactive if used with MultiKueue", func() { + admissionCheck = testing.MakeAdmissionCheck("multikueue").ControllerName(kueue.MultiKueueControllerName).Obj() + gomega.Expect(k8sClient.Create(ctx, admissionCheck)).To(gomega.Succeed()) + util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck, metav1.ConditionTrue) + + clusterQueue = testing.MakeClusterQueue("cq"). + ResourceGroup( + *testing.MakeFlavorQuotas(tasFlavor.Name).Resource(corev1.ResourceCPU, "5").Obj(), + ).AdmissionChecks(admissionCheck.Name).Obj() + gomega.Expect(k8sClient.Create(ctx, clusterQueue)).Should(gomega.Succeed()) + + gomega.Eventually(func() []metav1.Condition { + var updatedCq kueue.ClusterQueue + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(clusterQueue), &updatedCq)).To(gomega.Succeed()) + return updatedCq.Status.Conditions + }, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]metav1.Condition{ + { + Type: kueue.ClusterQueueActive, + Status: metav1.ConditionFalse, + Reason: "NotSupportedWithTopologyAwareScheduling", + Message: `Can't admit new workloads: TAS is not supported with MultiKueue admission check.`, + }, + }, util.IgnoreConditionTimestampsAndObservedGeneration)) + }) + + ginkgo.It("should mark TAS ClusterQueue as inactive if used with ProvisioningRequest", func() { + admissionCheck = testing.MakeAdmissionCheck("provisioning").ControllerName(kueue.ProvisioningRequestControllerName).Obj() + gomega.Expect(k8sClient.Create(ctx, admissionCheck)).To(gomega.Succeed()) + util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck, metav1.ConditionTrue) + + clusterQueue = testing.MakeClusterQueue("cq"). + ResourceGroup( + *testing.MakeFlavorQuotas(tasFlavor.Name).Resource(corev1.ResourceCPU, "5").Obj(), + ).AdmissionChecks(admissionCheck.Name).Obj() + gomega.Expect(k8sClient.Create(ctx, clusterQueue)).Should(gomega.Succeed()) + + gomega.Eventually(func() []metav1.Condition { + var updatedCq kueue.ClusterQueue + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(clusterQueue), &updatedCq)).To(gomega.Succeed()) + return updatedCq.Status.Conditions + }, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]metav1.Condition{ + { + Type: kueue.ClusterQueueActive, + Status: metav1.ConditionFalse, + Reason: "NotSupportedWithTopologyAwareScheduling", + Message: `Can't admit new workloads: TAS is not supported with ProvisioningRequest admission check.`, + }, + }, util.IgnoreConditionTimestampsAndObservedGeneration)) + }) + }) })