Skip to content

Commit

Permalink
Add variable for setting the GPUs on cluster
Browse files Browse the repository at this point in the history
The change also includes a patch to set the GPUs on
nutanixmachinetemplate resources for control plane and worker
machine deployments.
  • Loading branch information
thunderboltsid committed Apr 2, 2024
1 parent 95b88be commit 12dc352
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 15 deletions.
40 changes: 40 additions & 0 deletions templates/cluster-template-clusterclass.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,31 @@ spec:
- nutanix-quick-start-worker
enabledIf: '{{if .additionalCategories}}true{{end}}'
name: add-additional-categories
- definitions:
- jsonPatches:
- op: add
path: /spec/template/spec/gpus
valueFrom:
variable: gpus
selector:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: NutanixMachineTemplate
matchResources:
controlPlane: true
- jsonPatches:
- op: add
path: /spec/template/spec/gpus
valueFrom:
variable: gpus
selector:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: NutanixMachineTemplate
matchResources:
machineDeploymentClass:
names:
- nutanix-quick-start-worker
enabledIf: '{{if .gpus}}true{{end}}'
name: add-gpus
variables:
- name: sshKey
required: true
Expand Down Expand Up @@ -440,6 +465,21 @@ spec:
type: string
type: object
type: array
- name: gpus
required: false
schema:
description: List of GPUs to be used in the machine deployment in cluster.
openAPIV3Schema:
items:
properties:
deviceID:
type: integer
name:
type: string
type:
type: string
type: object
type: array
workers:
machineDeployments:
- class: nutanix-quick-start-worker
Expand Down
40 changes: 40 additions & 0 deletions templates/clusterclass/clusterclass.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,31 @@ spec:
path: /spec/template/spec/additionalCategories
valueFrom:
variable: additionalCategories
- name: add-gpus
enabledIf: "{{if .gpus}}true{{end}}"
definitions:
- selector:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: NutanixMachineTemplate
matchResources:
controlPlane: true
jsonPatches:
- op: add
path: /spec/template/spec/gpus
valueFrom:
variable: gpus
- selector:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: NutanixMachineTemplate
matchResources:
machineDeploymentClass:
names:
- nutanix-quick-start-worker
jsonPatches:
- op: add
path: /spec/template/spec/gpus
valueFrom:
variable: gpus
variables:
- name: sshKey
required: true
Expand Down Expand Up @@ -456,3 +481,18 @@ spec:
type: string
value:
type: string
- name: gpus
required: false
schema:
openAPIV3Schema:
type: array
items:
type: object
properties:
name:
type: string
deviceID:
type: integer
type:
type: string
description: List of GPUs to be used in the machine deployment in cluster.
42 changes: 27 additions & 15 deletions templates/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/klog/v2/textlogger"
"k8s.io/utils/ptr"
capiv1 "sigs.k8s.io/cluster-api/api/v1beta1"
clusterctllog "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
Expand Down Expand Up @@ -338,11 +339,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() {
err = clnt.Create(context.Background(), obj) // Create the cluster
Expect(err).NotTo(HaveOccurred())

Eventually(func() error {
_, err = fetchNutanixCluster(clnt, obj.GetName())
return err
}).Within(time.Minute).Should(Succeed())

Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) {
return fetchMachineTemplates(clnt, obj.GetName())
}).Within(time.Minute).Should(And(HaveLen(2),
Expand All @@ -361,11 +357,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() {
err = clnt.Create(context.Background(), obj) // Create the cluster
Expect(err).NotTo(HaveOccurred())

Eventually(func() error {
_, err = fetchNutanixCluster(clnt, obj.GetName())
return err
}).Within(time.Minute).Should(Succeed())

Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) {
return fetchMachineTemplates(clnt, obj.GetName())
}).Within(time.Minute).Should(And(HaveLen(2),
Expand All @@ -384,11 +375,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() {
err = clnt.Create(context.Background(), obj) // Create the cluster
Expect(err).NotTo(HaveOccurred())

Eventually(func() error {
_, err = fetchNutanixCluster(clnt, obj.GetName())
return err
}).Within(time.Minute).Should(Succeed())

Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) {
return fetchMachineTemplates(clnt, obj.GetName())
}).Within(time.Minute).Should(And(HaveLen(2),
Expand All @@ -400,4 +386,30 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() {
})))))
})
})

Describe("patches for GPUs", func() {
It("should have correct GPUs", func() {
clusterManifest := "testdata/cluster-with-gpu.yaml"
obj, err := getClusterManifest(clusterManifest)
Expect(err).NotTo(HaveOccurred())

err = clnt.Create(context.Background(), obj) // Create the cluster
Expect(err).NotTo(HaveOccurred())

Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) {
return fetchMachineTemplates(clnt, obj.GetName())
}).Within(time.Minute).Should(And(HaveLen(2),
HaveEach(HaveExistingField("Spec.Template.Spec.GPUs")),
HaveEach(HaveField("Spec.Template.Spec.GPUs", HaveLen(2))),
HaveEach(HaveField("Spec.Template.Spec.GPUs", ContainElement(v1beta1.NutanixGPU{
Type: v1beta1.NutanixGPUIdentifierName,
Name: ptr.To("fake-gpu"),
}))),
HaveEach(HaveField("Spec.Template.Spec.GPUs", ContainElement(v1beta1.NutanixGPU{
Type: v1beta1.NutanixGPUIdentifierDeviceID,
DeviceID: ptr.To(int64(42)),
}))),
))
})
})
})
58 changes: 58 additions & 0 deletions templates/testdata/cluster-with-gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
apiVersion: cluster.x-k8s.io/v1beta1
kind: Cluster
metadata:
labels:
ccm: nutanix
cluster.x-k8s.io/cluster-name: cluster-with-gpu
name: cluster-with-gpu
spec:
topology:
class: nutanix-quick-start
controlPlane:
metadata: {}
replicas: 1
variables:
- name: sshKey
value: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMe61GqA9gqeX3zDCiwuU8zEDt3ckLnfVm8ZxN7UuFyL user@host
- name: controlPlaneEndpoint
value:
IP: 1.2.3.4
port: 6443
- name: prismCentralEndpoint
value:
address: prismcentral.fake
credentialSecret: nutanix-quick-start-pc-creds
insecure: false
port: 9440
- name: controlPlaneMachineDetails
value:
bootType: legacy
clusterName: fake-cluster
imageName: ubuntu-2204-kube-v1.29.2.qcow2
memorySize: 4Gi
subnetName: fake-subnet
systemDiskSize: 40Gi
vcpuSockets: 2
vcpusPerSocket: 1
- name: workerMachineDetails
value:
bootType: legacy
clusterName: fake-cluster
imageName: ubuntu-2204-kube-v1.29.2.qcow2
memorySize: 4Gi
subnetName: fake-subnet
systemDiskSize: 40Gi
vcpuSockets: 2
vcpusPerSocket: 1
- name: gpus
value:
- type: name
name: fake-gpu
- type: deviceID
deviceID: 42
version: v1.29.2
workers:
machineDeployments:
- class: nutanix-quick-start-worker
name: md-0
replicas: 2

0 comments on commit 12dc352

Please sign in to comment.