From 864e36a01e2f3ed2c219162e9eab36403d43a005 Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Tue, 15 Oct 2024 16:17:31 +0200 Subject: [PATCH] TAS: KEP update with API changes --- keps/2724-topology-aware-schedling/README.md | 123 ++++++++++++++----- 1 file changed, 90 insertions(+), 33 deletions(-) diff --git a/keps/2724-topology-aware-schedling/README.md b/keps/2724-topology-aware-schedling/README.md index 5a20749350..6f27de59be 100644 --- a/keps/2724-topology-aware-schedling/README.md +++ b/keps/2724-topology-aware-schedling/README.md @@ -334,7 +334,7 @@ the "cloud.provider.com/topology-rack" label, but in different blocks. type ResourceFlavorSpec struct { ... - // TopologyName indicates the name of the topology for the ResourceFlavor. + // topologyName indicates topology for the TAS ResourceFlavor. // When specified, it enables scraping of the topology information from the // nodes matching to the Resource Flavor node labels. // @@ -344,11 +344,12 @@ type ResourceFlavorSpec struct { // TopologySpec defines the desired state of Topology type TopologySpec struct { - // Levels defines the levels of topology. + // levels define the levels of topology. // + // +required // +listType=atomic // +kubebuilder:validation:MinItems=1 - // +kubebuilder:validation:MaxItems=5 + // +kubebuilder:validation:MaxItems=8 Levels []TopologyLevel `json:"levels,omitempty"` } @@ -397,18 +398,19 @@ PodTemplate level: ```golang const ( - // This annotation indicates that a PodSet requires Topology Aware Scheduling, - // and running all pods on nodes closely connected within the same level of - // hierarchy is a strong requirement for scheduling the workload. + // PodSetRequiredTopologyAnnotation indicates that a PodSet requires + // Topology Aware Scheduling, and requires scheduling all pods on nodes + // within the same topology domain corresponding to the topology level + // indicated by the annotation value (e.g. within a rack or within a block). PodSetRequiredTopologyAnnotation = "kueue.x-k8s.io/podset-required-topology" - // This annotation indicates that a PodSet requires Topology Aware Scheduling, - // but running all pods without the same topology level is a preference rather - // than requirement. + // PodSetPreferredTopologyAnnotation indicates that a PodSet requires + // Topology Aware Scheduling, but scheduling all pods within pods on nodes + // within the same topology domain is a preference rather than requirement. // // The levels are evaluated one-by-one going up from the level indicated by // the annotation. If the PodSet cannot fit within a given topology domain - // then the next topology level up is checked. If the PodSet cannot fit + // then the next topology level up is considered. If the PodSet cannot fit // at the highest topology level, then it gets admitted as distributed // among multiple topology domains. PodSetPreferredTopologyAnnotation = "kueue.x-k8s.io/podset-preferred-topology" @@ -439,20 +441,26 @@ Job level. ```golang type PodSet struct { ... - // TopologyRequest defines the topology requested for the corresponding PodSet. + // topologyRequest defines the topology request for the PodSet. + // // +optional TopologyRequest *PodSetTopologyRequest `json:"topologyRequest,omitempty"` } type PodSetTopologyRequest struct { - // Policy defines the policy used for TAS. Possible values are: - // - Preferred set when `kueue.x-k8s.io/podset-preferred-topology` annotation is set on the Job - // - Required set when `kueue.x-k8s.io/podset-required-topology` annotation is set on the Job - Policy TopologyRequestPolicy `json:"policy"` - - // Level indicated by the `kueue.x-k8s.io/podset-preferred-topology` or - // `kueue.x-k8s.io/podset-required-topology` annotation - Level string `json:"level"` + // required indicates the topology level required by the PodSet, as + // indicated by the `kueue.x-k8s.io/podset-required-topology` PodSet + // annotation. + // + // +optional + Required *string `json:"required,omitempty"` + + // preferred indicates the topology level preferred by the PodSet, as + // indicated by the `kueue.x-k8s.io/podset-preferred-topology` PodSet + // annotation. + // + // +optional + Preferred *string `json:"preferred,omitempty"` } ``` @@ -463,28 +471,72 @@ at each topology level to the specific subset of nodes. type PodSetAssignment struct { ... - // TopologyAssignment indicates the resources assigned per topology level + // topologyAssignment indicates the topology assignment divided into + // topology domains corresponding to the lowest level of the topology. + // The assignment specifies the number of Pods to be scheduled per topology + // domain and specifies the node selectors for each topology domain, in the + // following way: the node selector keys are specified by the levels field + // (same for all domains), and the corresponding node selector value is + // specified by the domains.values subfield. + // + // Example: + // + // topologyAssignment: + // levels: + // - cloud.provider.com/topology-block + // - cloud.provider.com/topology-rack + // domains: + // - values: [block-1, rack-1] + // count: 4 + // - values: [block-1, rack-2] + // count: 2 + // + // Here: + // - 4 Pods are to be scheduled on nodes matching the node selector: + // cloud.provider.com/topology-block: block-1 + // cloud.provider.com/topology-rack: rack-1 + // - 2 Pods are to be scheduled on nodes matching the node selector: + // cloud.provider.com/topology-block: block-1 + // cloud.provider.com/topology-rack: rack-2 + // // +optional TopologyAssignment *TopologyAssignment `json:"topologyAssignment,omitempty"` } type TopologyAssignment struct { - // Groups contains the list of assignments split into groups corresponding - // to the same topology domain at the lowest level of the hierarchy. + // levels is an ordered list of keys denoting the levels of the assigned + // topology (i.e. node label keys), from the highest to the lowest level of + // the topology. + // // +required // +listType=atomic // +kubebuilder:validation:MinItems=1 - Groups []TopologyAssignmentGroup `json:"groups"` + // +kubebuilder:validation:MaxItems=8 + Levels []string `json:"levels"` + + // domains is a list of topology assignments split by topology domains at + // the lowest level of the topology. + // + // +required + Domains []TopologyDomainAssignment `json:"domains"` } -type TopologyAssignmentGroup struct { - // NodeLabels constitutes the nodeSelector for a given slice of pods. It - // defines values for all labels configured in the Topology.Levels. +type TopologyDomainAssignment struct { + // values is an ordered list of node selector values describing a topology + // domain. The values correspond to the consecutive topology levels, from + // the highest to the lowest. + // + // +required + // +listType=atomic // +kubebuilder:validation:MinItems=1 - NodeLabels map[string]string `json:"nodeLabels"` + // +kubebuilder:validation:MaxItems=8 + Values []string `json:"values"` - // Count indicates the number of pods in a given TopologyAssignmentGroup. + // count indicates the number of Pods to be scheduled in the topology + // domain indicated by the values field. + // // +required + // +kubebuilder:validation:Minimum=1 Count int32 `json:"count"` } ``` @@ -495,15 +547,20 @@ different values: ```golang const ( - // TopologySchedulingGate is used to delay topology assignment for pods - // once all the pods are created. + // TopologySchedulingGate is used to delay scheduling of a Pod until the + // nodeSelectors corresponding to the assigned topology domain are injected + // into the Pod. TopologySchedulingGate = "kueue.x-k8s.io/topology" - // WorkloadAnnotation indicates the name of the workload assigned. + // WorkloadAnnotation is an annotation set on the Job's PodTemplate to + // indicate the name of the admitted Workload corresponding to the Job. The + // annotation is set when starting the Job, and removed on stopping the Job. WorkloadAnnotation = "kueue.x-k8s.io/workload" - // PodSetLabel indicates the name of the PodSet in the workload - PodSeLabel = "kueue.x-k8s.io/podset" + // PodSetLabel is a label set on the Job's PodTemplate to indicate the name + // of the PodSet of the admitted Workload corresponding to the PodTemplate. + // The label is set when starting the Job, and removed on stopping the Job. + PodSetLabel = "kueue.x-k8s.io/podset" ) ```