diff --git a/apis/kueue/v1alpha1/tas_types.go b/apis/kueue/v1alpha1/tas_types.go new file mode 100644 index 0000000000..0e6aa6ff8f --- /dev/null +++ b/apis/kueue/v1alpha1/tas_types.go @@ -0,0 +1,29 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +const ( + // WorkloadAnnotation is an annotation set on the Job's PodTemplate to + // indicate the name of the admitted Workload corresponding to the Job. The + // annotation is set when starting the Job, and removed on stopping the Job. + WorkloadAnnotation = "kueue.x-k8s.io/workload" + + // PodSetLabel is a label set on the Job's PodTemplate to indicate the name + // of the PodSet of the admitted Workload corresponding to the PodTemplate. + // The label is set when starting the Job, and removed on stopping the Job. + PodSetLabel = "kueue.x-k8s.io/podset" +) diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go index 6e4b02aee8..3a02150171 100644 --- a/pkg/controller/jobframework/reconciler.go +++ b/pkg/controller/jobframework/reconciler.go @@ -39,6 +39,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" configapi "sigs.k8s.io/kueue/apis/config/v1beta1" + kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/cache" "sigs.k8s.io/kueue/pkg/constants" @@ -973,7 +974,10 @@ func getPodSetsInfoFromStatus(ctx context.Context, c client.Client, w *kueue.Wor if err != nil { return nil, err } - + if features.Enabled(features.TopologyAwareScheduling) { + info.Labels[kueuealpha.PodSetLabel] = podSetFlavor.Name + info.Annotations[kueuealpha.WorkloadAnnotation] = w.Name + } for _, admissionCheck := range w.Status.AdmissionChecks { for _, podSetUpdate := range admissionCheck.PodSetUpdates { if podSetUpdate.Name == info.Name { diff --git a/pkg/controller/jobs/job/job_controller_test.go b/pkg/controller/jobs/job/job_controller_test.go index 15bae696be..9d6138fb84 100644 --- a/pkg/controller/jobs/job/job_controller_test.go +++ b/pkg/controller/jobs/job/job_controller_test.go @@ -33,10 +33,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/interceptor" "sigs.k8s.io/controller-runtime/pkg/reconcile" + kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/constants" controllerconsts "sigs.k8s.io/kueue/pkg/controller/constants" "sigs.k8s.io/kueue/pkg/controller/jobframework" + "sigs.k8s.io/kueue/pkg/features" "sigs.k8s.io/kueue/pkg/podset" utiltesting "sigs.k8s.io/kueue/pkg/util/testing" utiltestingjob "sigs.k8s.io/kueue/pkg/util/testingjobs/job" @@ -416,6 +418,8 @@ func TestReconciler(t *testing.T) { PriorityValue(200) cases := map[string]struct { + enableTopologyAwareScheduling bool + reconcilerOptions []jobframework.Option job batchv1.Job workloads []kueue.Workload @@ -426,6 +430,36 @@ func TestReconciler(t *testing.T) { wantEvents []utiltesting.EventRecord wantErr error }{ + "PodSet label and Workload annotation are set when Job is starting; TopologyAwareScheduling enabled": { + enableTopologyAwareScheduling: true, + reconcilerOptions: []jobframework.Option{ + jobframework.WithManageJobsWithoutQueueName(true), + }, + job: *baseJobWrapper.DeepCopy(), + wantJob: *baseJobWrapper.Clone(). + Suspend(false). + PodLabel(kueuealpha.PodSetLabel, kueue.DefaultPodSetName). + PodAnnotation(kueuealpha.WorkloadAnnotation, "wl"). + Obj(), + workloads: []kueue.Workload{ + *baseWorkloadWrapper.Clone(). + Admitted(true). + Obj(), + }, + wantWorkloads: []kueue.Workload{ + *baseWorkloadWrapper.Clone(). + Admitted(true). + Obj(), + }, + wantEvents: []utiltesting.EventRecord{ + { + Key: types.NamespacedName{Name: "job", Namespace: "ns"}, + EventType: "Normal", + Reason: "Started", + Message: "Admitted by clusterQueue cq", + }, + }, + }, "when workload is created, it has its owner ProvReq annotations": { job: *baseJobWrapper.Clone(). SetAnnotation(controllerconsts.ProvReqAnnotationPrefix+"test-annotation", "test-val"). @@ -2713,6 +2747,7 @@ func TestReconciler(t *testing.T) { } for name, tc := range cases { t.Run(name, func(t *testing.T) { + features.SetFeatureGateDuringTest(t, features.TopologyAwareScheduling, tc.enableTopologyAwareScheduling) ctx, _ := utiltesting.ContextWithLog(t) clientBuilder := utiltesting.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{SubResourcePatch: utiltesting.TreatSSAAsStrategicMerge}) if err := SetupIndexes(ctx, utiltesting.AsIndexer(clientBuilder)); err != nil { diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index e06da8d76c..d952be2e48 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -102,6 +102,13 @@ const ( // Enable more than one workload sharing flavors to preempt within a Cohort, // as long as the preemption targets don't overlap. MultiplePreemptions featuregate.Feature = "MultiplePreemptions" + + // owner: @mimowo + // alpha: v0.9 + // + // Enable Topology Aware Scheduling allowing to optimize placement of Pods + // to put them on closely located nodes (e.g. within the same rack or block). + TopologyAwareScheduling featuregate.Feature = "TopologyAwareScheduling" ) func init() { @@ -125,6 +132,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ LendingLimit: {Default: true, PreRelease: featuregate.Beta}, MultiKueueBatchJobWithManagedBy: {Default: false, PreRelease: featuregate.Alpha}, MultiplePreemptions: {Default: true, PreRelease: featuregate.Beta}, + TopologyAwareScheduling: {Default: false, PreRelease: featuregate.Alpha}, } func SetFeatureGateDuringTest(tb testing.TB, f featuregate.Feature, value bool) { diff --git a/site/content/en/docs/installation/_index.md b/site/content/en/docs/installation/_index.md index fecbb483bc..1aece6e21b 100644 --- a/site/content/en/docs/installation/_index.md +++ b/site/content/en/docs/installation/_index.md @@ -252,6 +252,7 @@ The currently supported features are: | `LendingLimit` | `true` | Beta | 0.9 | | | `MultiplePreemptions` | `false` | Alpha | 0.8 | 0.8 | | `MultiplePreemptions` | `true` | Beta | 0.9 | | +| `TopologyAwareScheduling` | `false` | Alpha | 0.9 | | ## What's next