diff --git a/templates/cluster-template-clusterclass.yaml b/templates/cluster-template-clusterclass.yaml index d95278cc98..8b43d95e90 100644 --- a/templates/cluster-template-clusterclass.yaml +++ b/templates/cluster-template-clusterclass.yaml @@ -297,6 +297,31 @@ spec: - nutanix-quick-start-worker enabledIf: '{{if .additionalCategories}}true{{end}}' name: add-additional-categories + - definitions: + - jsonPatches: + - op: add + path: /spec/template/spec/gpus + valueFrom: + variable: gpus + selector: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + matchResources: + controlPlane: true + - jsonPatches: + - op: add + path: /spec/template/spec/gpus + valueFrom: + variable: gpus + selector: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + matchResources: + machineDeploymentClass: + names: + - nutanix-quick-start-worker + enabledIf: '{{if .gpus}}true{{end}}' + name: add-gpus variables: - name: sshKey required: true @@ -432,6 +457,8 @@ spec: required: false schema: openAPIV3Schema: + description: Additional categories to be added to the machine deployment in + cluster. items: properties: key: @@ -440,6 +467,21 @@ spec: type: string type: object type: array + - name: gpus + required: false + schema: + openAPIV3Schema: + description: List of GPUs to be used in the machine deployment in cluster. + items: + properties: + deviceID: + type: integer + name: + type: string + type: + type: string + type: object + type: array workers: machineDeployments: - class: nutanix-quick-start-worker diff --git a/templates/clusterclass/clusterclass.yaml b/templates/clusterclass/clusterclass.yaml index 358734de9c..697903f61a 100644 --- a/templates/clusterclass/clusterclass.yaml +++ b/templates/clusterclass/clusterclass.yaml @@ -313,6 +313,31 @@ spec: path: /spec/template/spec/additionalCategories valueFrom: variable: additionalCategories + - name: add-gpus + enabledIf: "{{if .gpus}}true{{end}}" + definitions: + - selector: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + matchResources: + controlPlane: true + jsonPatches: + - op: add + path: /spec/template/spec/gpus + valueFrom: + variable: gpus + - selector: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + matchResources: + machineDeploymentClass: + names: + - nutanix-quick-start-worker + jsonPatches: + - op: add + path: /spec/template/spec/gpus + valueFrom: + variable: gpus variables: - name: sshKey required: true @@ -448,6 +473,7 @@ spec: required: false schema: openAPIV3Schema: + description: Additional categories to be added to the machine deployment in cluster. type: array items: type: object @@ -456,3 +482,18 @@ spec: type: string value: type: string + - name: gpus + required: false + schema: + openAPIV3Schema: + description: List of GPUs to be used in the machine deployment in cluster. + type: array + items: + type: object + properties: + name: + type: string + deviceID: + type: integer + type: + type: string diff --git a/templates/template_test.go b/templates/template_test.go index de7ef1d55c..fe2b436b7e 100644 --- a/templates/template_test.go +++ b/templates/template_test.go @@ -19,6 +19,7 @@ import ( "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/clientcmd" "k8s.io/klog/v2/textlogger" + "k8s.io/utils/ptr" capiv1 "sigs.k8s.io/cluster-api/api/v1beta1" clusterctllog "sigs.k8s.io/cluster-api/cmd/clusterctl/log" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" @@ -338,11 +339,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() { err = clnt.Create(context.Background(), obj) // Create the cluster Expect(err).NotTo(HaveOccurred()) - Eventually(func() error { - _, err = fetchNutanixCluster(clnt, obj.GetName()) - return err - }).Within(time.Minute).Should(Succeed()) - Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) { return fetchMachineTemplates(clnt, obj.GetName()) }).Within(time.Minute).Should(And(HaveLen(2), @@ -361,11 +357,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() { err = clnt.Create(context.Background(), obj) // Create the cluster Expect(err).NotTo(HaveOccurred()) - Eventually(func() error { - _, err = fetchNutanixCluster(clnt, obj.GetName()) - return err - }).Within(time.Minute).Should(Succeed()) - Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) { return fetchMachineTemplates(clnt, obj.GetName()) }).Within(time.Minute).Should(And(HaveLen(2), @@ -384,11 +375,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() { err = clnt.Create(context.Background(), obj) // Create the cluster Expect(err).NotTo(HaveOccurred()) - Eventually(func() error { - _, err = fetchNutanixCluster(clnt, obj.GetName()) - return err - }).Within(time.Minute).Should(Succeed()) - Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) { return fetchMachineTemplates(clnt, obj.GetName()) }).Within(time.Minute).Should(And(HaveLen(2), @@ -400,4 +386,30 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() { }))))) }) }) + + Describe("patches for GPUs", func() { + It("should have correct GPUs", func() { + clusterManifest := "testdata/cluster-with-gpu.yaml" + obj, err := getClusterManifest(clusterManifest) + Expect(err).NotTo(HaveOccurred()) + + err = clnt.Create(context.Background(), obj) // Create the cluster + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) { + return fetchMachineTemplates(clnt, obj.GetName()) + }).Within(time.Minute).Should(And(HaveLen(2), + HaveEach(HaveExistingField("Spec.Template.Spec.GPUs")), + HaveEach(HaveField("Spec.Template.Spec.GPUs", HaveLen(2))), + HaveEach(HaveField("Spec.Template.Spec.GPUs", ContainElement(v1beta1.NutanixGPU{ + Type: v1beta1.NutanixGPUIdentifierName, + Name: ptr.To("fake-gpu"), + }))), + HaveEach(HaveField("Spec.Template.Spec.GPUs", ContainElement(v1beta1.NutanixGPU{ + Type: v1beta1.NutanixGPUIdentifierDeviceID, + DeviceID: ptr.To(int64(42)), + }))), + )) + }) + }) }) diff --git a/templates/testdata/cluster-with-gpu.yaml b/templates/testdata/cluster-with-gpu.yaml new file mode 100644 index 0000000000..73e8c8a10e --- /dev/null +++ b/templates/testdata/cluster-with-gpu.yaml @@ -0,0 +1,58 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + labels: + ccm: nutanix + cluster.x-k8s.io/cluster-name: cluster-with-gpu + name: cluster-with-gpu +spec: + topology: + class: nutanix-quick-start + controlPlane: + metadata: {} + replicas: 1 + variables: + - name: sshKey + value: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMe61GqA9gqeX3zDCiwuU8zEDt3ckLnfVm8ZxN7UuFyL user@host + - name: controlPlaneEndpoint + value: + IP: 1.2.3.4 + port: 6443 + - name: prismCentralEndpoint + value: + address: prismcentral.fake + credentialSecret: nutanix-quick-start-pc-creds + insecure: false + port: 9440 + - name: controlPlaneMachineDetails + value: + bootType: legacy + clusterName: fake-cluster + imageName: ubuntu-2204-kube-v1.29.2.qcow2 + memorySize: 4Gi + subnetName: fake-subnet + systemDiskSize: 40Gi + vcpuSockets: 2 + vcpusPerSocket: 1 + - name: workerMachineDetails + value: + bootType: legacy + clusterName: fake-cluster + imageName: ubuntu-2204-kube-v1.29.2.qcow2 + memorySize: 4Gi + subnetName: fake-subnet + systemDiskSize: 40Gi + vcpuSockets: 2 + vcpusPerSocket: 1 + - name: gpus + value: + - type: name + name: fake-gpu + - type: deviceID + deviceID: 42 + version: v1.29.2 + workers: + machineDeployments: + - class: nutanix-quick-start-worker + name: md-0 + replicas: 2