From 6f45058918d08bc8d87f734eba4251d18eb3b748 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?=
 <mimowo@users.noreply.github.com>
Date: Mon, 4 Nov 2024 16:03:23 +0100
Subject: [PATCH] Review remarks

Co-authored-by: David Grove <dgrove-oss@users.noreply.github.com>
Co-authored-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
---
 .../concepts/topology_aware_scheduling.md     | 86 ++++---------------
 .../examples/tas/sample-job-preferred.yaml    | 24 ++++++
 site/static/examples/tas/sample-queues.yaml   | 34 ++++++++
 3 files changed, 74 insertions(+), 70 deletions(-)
 create mode 100644 site/static/examples/tas/sample-job-preferred.yaml
 create mode 100644 site/static/examples/tas/sample-queues.yaml

diff --git a/site/content/en/docs/concepts/topology_aware_scheduling.md b/site/content/en/docs/concepts/topology_aware_scheduling.md
index aba7ea5e80..b75d15c59f 100644
--- a/site/content/en/docs/concepts/topology_aware_scheduling.md
+++ b/site/content/en/docs/concepts/topology_aware_scheduling.md
@@ -9,9 +9,9 @@ description: >
 {{< feature-state state="alpha" for_version="v0.9" >}}
 
 It is common that AI/ML workloads require a significant amount of pod-to-pod
-communication, and thus the network bendwidth between the running Pods
+communication. Therefore the network bandwidth between the running Pods
 translates into the workload execution time, and the cost of running
-such workloads. Then, the connectivity between the Pods depends on the placement
+such workloads. The available bandwidth between the Pods depends on the placement
 of the Nodes, running the Pods, in the data center.
 
 We observe that the data centers have a hierarchical structure of their
@@ -25,7 +25,7 @@ blocks are more distant than two nodes within the same block.
 In this feature (called Topology Aware Scheduling, or TAS for short) we
 introduce a convention to represent the
 [hierarchical node topology information](#node-topology-information), and a set
-of APIs for Kueue administrators and users to utilize the information in order
+of APIs for Kueue administrators and users to utilize the information
 to optimize the Pod placement.
 
 ### Node topology information
@@ -39,7 +39,7 @@ which identifies uniquely its location in the tree structure. We do not assume
 global uniqueness of labels on each level, i.e. there could be two nodes with
 the same "rack" label, but in different "blocks".
 
-For example, this is a representation of the dataset hierarchy;
+For example, this is a representation of the data center hierarchy;
 
 |  node  |  cloud.provider.com/topology-block | cloud.provider.com/topology-rack |
 |:------:|:----------------------------------:|:--------------------------------:|
@@ -51,6 +51,11 @@ For example, this is a representation of the dataset hierarchy;
 Note that, there is a pair of nodes, node-1 and node-3, with the same value of
 the "cloud.provider.com/topology-rack" label, but in different blocks.
 
+{{% alert title="Note" color="primary" %}}
+TAS only includes Nodes with `Ready=True` condition when aggregating the Node
+capacity for scheduling in each topology domain.
+{{% /alert %}}
+
 ### Admin-facing APIs
 
 As an admin, in order to enable the feature you need to:
@@ -61,48 +66,13 @@ As an admin, in order to enable the feature you need to:
 
 #### Example
 
-```yaml
-apiVersion: kueue.x-k8s.io/v1alpha1
-kind: Topology
-metadata:
-  name: "default"
-spec:
-  levels:
-  - nodeLabel: "cloud.provider.com/topology-block"
-  - nodeLabel: "cloud.provider.com/topology-rack"
-  - nodeLabel: "kubernetes.io/hostname"
----
-kind: ResourceFlavor
-apiVersion: kueue.x-k8s.io/v1beta1
-metadata:
-  name: "tas-flavor"
-spec:
-  nodeLabels:
-    cloud.provider.com/: "tas-node-group"
-  topologyName: "default"
----
-apiVersion: kueue.x-k8s.io/v1beta1
-kind: ClusterQueue
-metadata:
-  name: "tas-cluster-queue"
-spec:
-  namespaceSelector: {} # match all.
-  resourceGroups:
-  - coveredResources: ["cpu", "memory"]
-    flavors:
-    - name: "tas-flavor"
-      resources:
-      - name: "cpu"
-        nominalQuota: 100
-      - name: "memory"
-        nominalQuota: 100Gi
-```
+{{< include "examples/tas/sample-queues.yaml" "yaml" >}}
 
 ### User-facing APIs
 
 Once TAS is configured and ready to be used, you can create Jobs with the
 following annotations set at the PodTemplate level:
-- `kueue.x-k8s.io/podset-required-topology` - indicates that a PodSet requires
+- `kueue.x-k8s.io/podset-preferred-topology` - indicates that a PodSet requires
 	Topology Aware Scheduling, but scheduling all pods within pods on nodes
 	within the same topology domain is a preference rather than requirement.
 	The levels are evaluated one-by-one going up from the level indicated by
@@ -121,32 +91,7 @@ Here is an example Job a user might submit to use TAS. It assumes there exists
 a LocalQueue named `tas-user-queue` which refernces the ClusterQueue pointing
 to a TAS ResourceFlavor.
 
-```yaml
-apiVersion: batch/v1
-kind: Job
-metadata:
-  generateName: tas-sample-big-preferred-host
-  labels:
-    kueue.x-k8s.io/queue-name: tas-user-queue
-spec:
-  parallelism: 40
-  completions: 40
-  completionMode: Indexed
-  template:
-    metadata:
-      annotations:
-        kueue.x-k8s.io/podset-preferred-topology: "cloud.provider.com/topology-block"
-    spec:
-      containers:
-      - name: dummy-job
-        image: gcr.io/k8s-staging-perf-tests/sleep:v0.1.0
-        args: ["300s"]
-        resources:
-          requests:
-            cpu: "1"
-            memory: "200Mi"
-      restartPolicy: Never
-```
+{{< include "examples/tas/sample-job-preferred.yaml" "yaml" >}}
 
 ### Limitations
 
@@ -154,9 +99,10 @@ Currently, there are multiple limitations for the compatibility of the feature
 with other features. In particular, a ClusterQueue referencing a TAS Resource
 Flavor (with the `.spec.topologyName` field) is marked as inactive in the
 following scenarios:
-- the CQ is in cohort
-- the CQ is using preemption
-- the CQ is using MultiKueue or ProvisioningRequest admission checks
+- the CQ is in cohort (`.spec.cohort` is set)
+- the CQ is using [preemption](preemption.md)
+- the CQ is using [MultiKueue](multikueue.md) or
+  [ProvisioningRequest](/docs/admission-check-controllers/provisioning/) admission checks
 
 These usage scenarios are considered to be supported in the future releases
 of Kueue.
diff --git a/site/static/examples/tas/sample-job-preferred.yaml b/site/static/examples/tas/sample-job-preferred.yaml
new file mode 100644
index 0000000000..3a0674bfce
--- /dev/null
+++ b/site/static/examples/tas/sample-job-preferred.yaml
@@ -0,0 +1,24 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  generateName: tas-sample-preferred
+  labels:
+    kueue.x-k8s.io/queue-name: tas-user-queue
+spec:
+  parallelism: 40
+  completions: 40
+  completionMode: Indexed
+  template:
+    metadata:
+      annotations:
+        kueue.x-k8s.io/podset-preferred-topology: "cloud.provider.com/topology-block"
+    spec:
+      containers:
+      - name: dummy-job
+        image: gcr.io/k8s-staging-perf-tests/sleep:v0.1.0
+        args: ["300s"]
+        resources:
+          requests:
+            cpu: "1"
+            memory: "200Mi"
+      restartPolicy: Never
\ No newline at end of file
diff --git a/site/static/examples/tas/sample-queues.yaml b/site/static/examples/tas/sample-queues.yaml
new file mode 100644
index 0000000000..56c1ae68db
--- /dev/null
+++ b/site/static/examples/tas/sample-queues.yaml
@@ -0,0 +1,34 @@
+apiVersion: kueue.x-k8s.io/v1alpha1
+kind: Topology
+metadata:
+  name: "default"
+spec:
+  levels:
+  - nodeLabel: "cloud.provider.com/topology-block"
+  - nodeLabel: "cloud.provider.com/topology-rack"
+  - nodeLabel: "kubernetes.io/hostname"
+---
+kind: ResourceFlavor
+apiVersion: kueue.x-k8s.io/v1beta1
+metadata:
+  name: "tas-flavor"
+spec:
+  nodeLabels:
+    cloud.provider.com/: "tas-node-group"
+  topologyName: "default"
+---
+apiVersion: kueue.x-k8s.io/v1beta1
+kind: ClusterQueue
+metadata:
+  name: "tas-cluster-queue"
+spec:
+  namespaceSelector: {} # match all.
+  resourceGroups:
+  - coveredResources: ["cpu", "memory"]
+    flavors:
+    - name: "tas-flavor"
+      resources:
+      - name: "cpu"
+        nominalQuota: 100
+      - name: "memory"
+        nominalQuota: 100Gi
\ No newline at end of file