intel · uniemimu · Mar 19, 2024 · Jan 4, 2024 · Mar 13, 2024 · Mar 18, 2024
diff --git a/gpu-aware-scheduling/README.md b/gpu-aware-scheduling/README.md
@@ -16,7 +16,7 @@ The typical use-case which GAS solves can be described with the following imagin
 4) Kubernetes Scheduler, if left to its own decision making, can place all the PODs on this one node with only 2 GPUs, since it only considers the memory amount of 16GB. However to be able to do such a deployment and run the PODs successfully, the last instance would need to allocate the GPU memory from two of the GPUs.
 5) GAS solves this issue by keeping book of the individual GPU memory amount. After two PODs have been deployed on the node, both GPUs have 3GB of free memory left. When GAS sees that the need for memory is 5GB but none of the GPUs in that node have as much (even though combined there is still 6GB left) it will filter out that node from the list of the nodes k8s scheduler proposed. The POD will not be deployed on that node.
 
-GAS tries to be agnostic about resource types. It doesn't try to have an understanding of the meaning of the resources, they are just numbers to it, which it identifies from other Kubernetes extended resources with the prefix `gpu.intel.com/`. The only resource treated differently is the GPU-plugin `i915`-resource, which is considered to describe "from how many GPUs the GPU-resources for the POD should be evenly consumed". That is, if each GPU has e.g. capacity of 1000 `gpu.intel.com/millicores", and POD spec has a limit for two (2) `gpu.intel.com/i915` and 2000 "gpu.intel.com/millicores`, that POD will consume 1000 millicores from two GPUs, totaling 2000 millicores. After GAS has calculated the resource requirement per GPU by dividing the extended resource numbers with the number of requested `i915`, deploying the POD to a node is only allowed if there are enough resources in the node to satisfy fully the per-GPU resource requirement in as many GPUs as requested in `i915` resource. Typical PODs use just one `i915` and consume resources from only a single GPU.
+GAS tries to be agnostic about resource types. It doesn't try to have an understanding of the meaning of the resources, they are just numbers to it, which it identifies from other Kubernetes extended resources with the prefix `gpu.intel.com/`. The only resources treated differently is the GPU-plugin `i915` and `xe` resources, which are considered to describe "from how many GPUs the GPU-resources for the POD should be evenly consumed". That is, if each GPU has e.g. capacity of 1000 `gpu.intel.com/millicores", and POD spec has a limit for two (2) `gpu.intel.com/i915` and 2000 "gpu.intel.com/millicores`, that POD will consume 1000 millicores from two GPUs, totaling 2000 millicores. After GAS has calculated the resource requirement per GPU by dividing the extended resource numbers with the number of requested `i915` or `xe`, deploying the POD to a node is only allowed if there are enough resources in the node to satisfy fully the per-GPU resource requirement in as many GPUs as requested in `i915` or `xe` resource. Typical PODs use just one `i915` or `xe` and consume resources from only a single GPU. Note that a Pod must only request either `i915` or `xe` resources, but never both.
 
 GAS heavily utilizes annotations. It itself annotates PODs after making filtering decisions on them, with a precise timestamp at annotation named "gas-ts". The timestamp can then be used for figuring out the time-order of the GAS-made scheduling decision for example during the GPU-plugin resource allocation phase, if the GPU-plugin wants to know the order of GPU-resource consuming POD deploying inside the node. Another annotation which GAS adds is "gas-container-cards". It will have the names of the cards selected for the containers. Containers are separated by "|", and card names are separated by ",". Thus a two-container POD in which both containers use 2 GPUs, could get an annotation "card0,card1|card2,card3". These annotations are then consumed by the Intel GPU device plugin.
 
@@ -141,7 +141,7 @@ spec:
 ```
 
 There is one change to the yaml here:
-- A resources/limits entry requesting the resource `gpu.intel.com/i915` will make GAS take part in scheduling such deployment. If this resource is not requested, GAS will not be used during scheduling of the pod.
+- A resources/limits entry requesting the resource `gpu.intel.com/i915` will make GAS take part in scheduling such deployment. If this resource is not requested, GAS will not be used during scheduling of the pod. Note: the `gpu.intel.com/xe` resource is also supported and Pods using it will also be scheduled through GAS.
 
 ### Unsupported use-cases
 

diff --git a/gpu-aware-scheduling/deploy/extender-configuration/scheduler-config-tas+gas.yaml b/gpu-aware-scheduling/deploy/extender-configuration/scheduler-config-tas+gas.yaml
@@ -24,6 +24,8 @@ extenders:
     managedResources:
       - name: "gpu.intel.com/i915"
         ignoredByScheduler: false
+      - name: "gpu.intel.com/xe"
+        ignoredByScheduler: false
     ignorable: true
     tlsConfig:
       insecure: false

diff --git a/gpu-aware-scheduling/go.mod b/gpu-aware-scheduling/go.mod
@@ -3,33 +3,33 @@ module github.com/intel/platform-aware-scheduling/gpu-aware-scheduling
 go 1.21
 
 require (
-	github.com/intel/platform-aware-scheduling/extender v0.6.0
+	github.com/intel/platform-aware-scheduling/extender v0.7.0
 	github.com/pkg/errors v0.9.1
 	github.com/smartystreets/goconvey v1.8.1
-	github.com/stretchr/testify v1.8.4
-	k8s.io/api v0.28.4
-	k8s.io/apimachinery v0.28.4
-	k8s.io/client-go v0.28.4
-	k8s.io/klog/v2 v2.110.1
-	k8s.io/kube-scheduler v0.28.4
+	github.com/stretchr/testify v1.9.0
+	k8s.io/api v0.29.3
+	k8s.io/apimachinery v0.29.3
+	k8s.io/client-go v0.29.3
+	k8s.io/klog/v2 v2.120.1
+	k8s.io/kube-scheduler v0.29.3
 )
 
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
-	github.com/emicklei/go-restful/v3 v3.10.2 // indirect
-	github.com/evanphx/json-patch v5.6.0+incompatible // indirect
-	github.com/go-logr/logr v1.3.0 // indirect
-	github.com/go-openapi/jsonpointer v0.20.0 // indirect
-	github.com/go-openapi/jsonreference v0.20.2 // indirect
-	github.com/go-openapi/swag v0.22.4 // indirect
+	github.com/emicklei/go-restful/v3 v3.12.0 // indirect
+	github.com/evanphx/json-patch v5.9.0+incompatible // indirect
+	github.com/go-logr/logr v1.4.1 // indirect
+	github.com/go-openapi/jsonpointer v0.21.0 // indirect
+	github.com/go-openapi/jsonreference v0.21.0 // indirect
+	github.com/go-openapi/swag v0.23.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
-	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/gnostic-models v0.6.8 // indirect
-	github.com/google/go-cmp v0.5.9 // indirect
+	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gofuzz v1.2.0 // indirect
-	github.com/google/uuid v1.3.0 // indirect
+	github.com/google/uuid v1.6.0 // indirect
 	github.com/gopherjs/gopherjs v1.17.2 // indirect
-	github.com/imdario/mergo v0.3.15 // indirect
+	github.com/imdario/mergo v0.3.16 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/jtolds/gls v4.20.0+incompatible // indirect
@@ -40,22 +40,21 @@ require (
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/smarty/assertions v1.15.1 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
-	github.com/stretchr/objx v0.5.0 // indirect
-	golang.org/x/net v0.17.0 // indirect
-	golang.org/x/oauth2 v0.10.0 // indirect
-	golang.org/x/sys v0.13.0 // indirect
-	golang.org/x/term v0.13.0 // indirect
-	golang.org/x/text v0.13.0 // indirect
-	golang.org/x/time v0.3.0 // indirect
-	golang.org/x/tools v0.11.1 // indirect
-	google.golang.org/appengine v1.6.7 // indirect
-	google.golang.org/protobuf v1.31.0 // indirect
+	github.com/stretchr/objx v0.5.2 // indirect
+	golang.org/x/net v0.22.0 // indirect
+	golang.org/x/oauth2 v0.18.0 // indirect
+	golang.org/x/sys v0.18.0 // indirect
+	golang.org/x/term v0.18.0 // indirect
+	golang.org/x/text v0.14.0 // indirect
+	golang.org/x/time v0.5.0 // indirect
+	google.golang.org/appengine v1.6.8 // indirect
+	google.golang.org/protobuf v1.33.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect
-	k8s.io/utils v0.0.0-20230505201702-9f6742963106 // indirect
+	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
+	k8s.io/utils v0.0.0-20240310230437-4693a0247e57 // indirect
 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
-	sigs.k8s.io/structured-merge-diff/v4 v4.3.0 // indirect
-	sigs.k8s.io/yaml v1.3.0 // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
+	sigs.k8s.io/yaml v1.4.0 // indirect
 )