diff --git a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml index 6ed2906463..63fa6ac52c 100644 --- a/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml +++ b/parts/k8s/addons/kubernetesmasteraddons-nvidia-device-plugin-daemonset.yaml @@ -1,26 +1,42 @@ -apiVersion: extensions/v1beta1 +apiVersion: apps/v1 kind: DaemonSet metadata: labels: + k8s-app: nvidia-device-plugin kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile name: nvidia-device-plugin namespace: kube-system spec: + selector: + matchLabels: + k8s-app: nvidia-device-plugin + updateStrategy: + type: RollingUpdate template: metadata: - # Mark this pod as a critical add-on; when enabled, the critical add-on scheduler - # reserves resources for critical add-on pods so that they can be rescheduled after - # a failure. This annotation works in tandem with the toleration below. annotations: scheduler.alpha.kubernetes.io/critical-pod: "" labels: - name: nvidia-device-plugin-ds + k8s-app: nvidia-device-plugin spec: + priorityClassName: system-node-critical + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: accelerator + operator: In + values: + - nvidia tolerations: - # Allow this pod to be rescheduled while the node is in "critical add-ons only" mode. - # This, along with the annotation above marks this pod as a critical add-on. - key: CriticalAddonsOnly operator: Exists + - key: nvidia.com/gpu + effect: NoSchedule + operator: Equal + value: "true" containers: - image: name: nvidia-device-plugin-ctr diff --git a/parts/k8s/kubernetesagentcustomdata.yml b/parts/k8s/kubernetesagentcustomdata.yml index 7466b748fb..28f69f2084 100644 --- a/parts/k8s/kubernetesagentcustomdata.yml +++ b/parts/k8s/kubernetesagentcustomdata.yml @@ -35,14 +35,14 @@ write_files: "log-opts": { "max-size": "50m", "max-file": "5" - }{{if IsNVIDIADevicePluginEnabled}} + }{{if IsNSeriesSKU .}}{{if IsNVIDIADevicePluginEnabled}} ,"default-runtime": "nvidia", "runtimes": { "nvidia": { "path": "/usr/bin/nvidia-container-runtime", "runtimeArgs": [] } - }{{end}} + }{{end}}{{end}} } {{end}} diff --git a/parts/k8s/kubernetesbase.t b/parts/k8s/kubernetesbase.t index d9efe71ab2..dd00475992 100644 --- a/parts/k8s/kubernetesbase.t +++ b/parts/k8s/kubernetesbase.t @@ -41,6 +41,11 @@ {{range $index, $agent := .AgentPoolProfiles}} "{{.Name}}Index": {{$index}}, {{template "k8s/kubernetesagentvars.t" .}} + {{if IsNSeriesSKU .}} + {{if IsNVIDIADevicePluginEnabled}} + "registerWithGpuTaints": "nvidia.com/gpu=true:NoSchedule", + {{end}} + {{end}} {{if .IsStorageAccount}} {{if .HasDisks}} "{{.Name}}DataAccountName": "[concat(variables('storageAccountBaseName'), 'data{{$index}}')]", @@ -187,4 +192,4 @@ {{end}} } -} \ No newline at end of file +} diff --git a/pkg/acsengine/const.go b/pkg/acsengine/const.go index e626537a91..40ebf79e8f 100644 --- a/pkg/acsengine/const.go +++ b/pkg/acsengine/const.go @@ -136,8 +136,8 @@ const ( DefaultReschedulerAddonName = "rescheduler" // DefaultMetricsServerAddonName is the name of the kubernetes Metrics server addon deployment DefaultMetricsServerAddonName = "metrics-server" - // DefaultNVIDIADevicePluginAddonName is the name of the kubernetes NVIDIA Device Plugin daemon set - DefaultNVIDIADevicePluginAddonName = "nvidia-device-plugin" + // NVIDIADevicePluginAddonName is the name of the kubernetes NVIDIA Device Plugin daemon set + NVIDIADevicePluginAddonName = "nvidia-device-plugin" // ContainerMonitoringAddonName is the name of the kubernetes Container Monitoring addon deployment ContainerMonitoringAddonName = "container-monitoring" // AzureCNINetworkMonitoringAddonName is the name of the Azure CNI networkmonitor addon diff --git a/pkg/acsengine/defaults-apiserver.go b/pkg/acsengine/defaults-apiserver.go index 0a1b2bf862..34f278f1ee 100644 --- a/pkg/acsengine/defaults-apiserver.go +++ b/pkg/acsengine/defaults-apiserver.go @@ -143,7 +143,7 @@ func getDefaultAdmissionControls(cs *api.ContainerService) (string, string) { // Add new version case when applying admission controllers only available in that version or later switch { case common.IsKubernetesVersionGe(o.OrchestratorVersion, "1.9.0"): - admissionControlValues = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota,DenyEscalatingExec,AlwaysPullImages" + admissionControlValues = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota,DenyEscalatingExec,AlwaysPullImages,ExtendedResourceToleration" default: admissionControlValues = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota,DenyEscalatingExec,AlwaysPullImages" } diff --git a/pkg/acsengine/defaults-apiserver_test.go b/pkg/acsengine/defaults-apiserver_test.go index ee1520b331..7ea3d972d8 100644 --- a/pkg/acsengine/defaults-apiserver_test.go +++ b/pkg/acsengine/defaults-apiserver_test.go @@ -322,7 +322,7 @@ func TestAPIServerConfigDefaultAdmissionControls(t *testing.T) { admissonControlKey := "--admission-control" cs := createContainerService("testcluster", version, 3, 2) cs.Properties.OrchestratorProfile.KubernetesConfig.APIServerConfig = map[string]string{} - cs.Properties.OrchestratorProfile.KubernetesConfig.APIServerConfig[admissonControlKey] = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota,DenyEscalatingExec,AlwaysPullImages" + cs.Properties.OrchestratorProfile.KubernetesConfig.APIServerConfig[admissonControlKey] = "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota,DenyEscalatingExec,AlwaysPullImages,ExtendedResourceToleration" setAPIServerConfig(cs) a := cs.Properties.OrchestratorProfile.KubernetesConfig.APIServerConfig diff --git a/pkg/acsengine/defaults.go b/pkg/acsengine/defaults.go index 2edb9318b3..29f0730d4b 100644 --- a/pkg/acsengine/defaults.go +++ b/pkg/acsengine/defaults.go @@ -307,10 +307,10 @@ var ( // DefaultNVIDIADevicePluginAddonsConfig is the default NVIDIA Device Plugin Kubernetes addon Config DefaultNVIDIADevicePluginAddonsConfig = api.KubernetesAddon{ - Name: DefaultNVIDIADevicePluginAddonName, + Name: NVIDIADevicePluginAddonName, Containers: []api.KubernetesContainerSpec{ { - Name: DefaultNVIDIADevicePluginAddonName, + Name: NVIDIADevicePluginAddonName, }, }, } @@ -453,7 +453,7 @@ func setOrchestratorDefaults(cs *api.ContainerService) { m = getAddonsIndexByName(o.KubernetesConfig.Addons, DefaultMetricsServerAddonName) o.KubernetesConfig.Addons[m].Enabled = k8sVersionMetricsServerAddonEnabled(o) } - n := getAddonsIndexByName(o.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonName) + n := getAddonsIndexByName(o.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) if n < 0 { // Provide default acs-engine config for NVIDIA Device Plugin o.KubernetesConfig.Addons = append(o.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonsConfig) @@ -563,7 +563,7 @@ func setOrchestratorDefaults(cs *api.ContainerService) { if a.OrchestratorProfile.KubernetesConfig.Addons[m].IsEnabled(api.DefaultMetricsServerAddonEnabled) { a.OrchestratorProfile.KubernetesConfig.Addons[m] = assignDefaultAddonVals(a.OrchestratorProfile.KubernetesConfig.Addons[m], DefaultMetricsServerAddonsConfig) } - n := getAddonsIndexByName(a.OrchestratorProfile.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonName) + n := getAddonsIndexByName(a.OrchestratorProfile.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) if a.OrchestratorProfile.KubernetesConfig.Addons[n].IsEnabled(api.DefaultNVIDIADevicePluginAddonEnabled) { a.OrchestratorProfile.KubernetesConfig.Addons[n] = assignDefaultAddonVals(a.OrchestratorProfile.KubernetesConfig.Addons[n], DefaultNVIDIADevicePluginAddonsConfig) } diff --git a/pkg/acsengine/k8s_versions.go b/pkg/acsengine/k8s_versions.go index f889136508..482c06fbb9 100644 --- a/pkg/acsengine/k8s_versions.go +++ b/pkg/acsengine/k8s_versions.go @@ -25,6 +25,7 @@ var k8sComponentVersions = map[string]map[string]string{ ContainerMonitoringAddonName: "oms:ciprod05082018", AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4", "cluster-autoscaler": "cluster-autoscaler:v1.3.0", + NVIDIADevicePluginAddonName: "k8s-device-plugin:1.10", "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, @@ -55,7 +56,7 @@ var k8sComponentVersions = map[string]map[string]string{ ContainerMonitoringAddonName: "oms:ciprod05082018", AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4", "cluster-autoscaler": "cluster-autoscaler:v1.2.2", - "nvidia-device-plugin": "k8s-device-plugin:1.10", + NVIDIADevicePluginAddonName: "k8s-device-plugin:1.10", "nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency, "nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod, "podeviction": DefaultKubernetesCtrlMgrPodEvictionTimeout, @@ -277,6 +278,7 @@ func getK8sVersionComponents(version string, overrides map[string]string) map[st "ratelimitbucket": k8sComponentVersions["1.11"]["ratelimitbucket"], "gchighthreshold": k8sComponentVersions["1.11"]["gchighthreshold"], "gclowthreshold": k8sComponentVersions["1.11"]["gclowthreshold"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.11"][NVIDIADevicePluginAddonName], } case "1.10": ret = map[string]string{ @@ -311,7 +313,7 @@ func getK8sVersionComponents(version string, overrides map[string]string) map[st "gchighthreshold": k8sComponentVersions["1.10"]["gchighthreshold"], "gclowthreshold": k8sComponentVersions["1.10"]["gclowthreshold"], DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.10"]["cluster-autoscaler"], - DefaultNVIDIADevicePluginAddonName: k8sComponentVersions["1.10"]["nvidia-device-plugin"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.10"][NVIDIADevicePluginAddonName], } case "1.9": ret = map[string]string{ diff --git a/pkg/acsengine/k8s_versions_test.go b/pkg/acsengine/k8s_versions_test.go index c4821ee55d..a63c2a4dd3 100644 --- a/pkg/acsengine/k8s_versions_test.go +++ b/pkg/acsengine/k8s_versions_test.go @@ -30,6 +30,7 @@ func TestGetK8sVersionComponents(t *testing.T) { ContainerMonitoringAddonName: k8sComponentVersions["1.11"][ContainerMonitoringAddonName], AzureCNINetworkMonitoringAddonName: k8sComponentVersions["1.11"][AzureCNINetworkMonitoringAddonName], DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.11"]["cluster-autoscaler"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.11"][NVIDIADevicePluginAddonName], "nodestatusfreq": k8sComponentVersions["1.11"]["nodestatusfreq"], "nodegraceperiod": k8sComponentVersions["1.11"]["nodegraceperiod"], "podeviction": k8sComponentVersions["1.11"]["podeviction"], @@ -50,6 +51,51 @@ func TestGetK8sVersionComponents(t *testing.T) { } } + oneDotTenDotZero := getK8sVersionComponents("1.10.0", nil) + if oneDotTenDotZero == nil { + t.Fatalf("getK8sVersionComponents() should not return nil for valid version") + } + expected = map[string]string{ + "hyperkube": "hyperkube-amd64:v1.10.0", + "ccm": "cloud-controller-manager-amd64:v1.10.0", + "windowszip": "v1.10.0-1int.zip", + "dockerEngineVersion": k8sComponentVersions["1.10"]["dockerEngine"], + DefaultDashboardAddonName: k8sComponentVersions["1.10"]["dashboard"], + "exechealthz": k8sComponentVersions["1.10"]["exechealthz"], + "addonresizer": k8sComponentVersions["1.10"]["addon-resizer"], + "heapster": k8sComponentVersions["1.10"]["heapster"], + DefaultMetricsServerAddonName: k8sComponentVersions["1.10"]["metrics-server"], + "dns": k8sComponentVersions["1.10"]["kube-dns"], + "addonmanager": k8sComponentVersions["1.10"]["addon-manager"], + "dnsmasq": k8sComponentVersions["1.10"]["dnsmasq"], + "pause": k8sComponentVersions["1.10"]["pause"], + DefaultTillerAddonName: k8sComponentVersions["1.10"]["tiller"], + DefaultReschedulerAddonName: k8sComponentVersions["1.10"]["rescheduler"], + DefaultACIConnectorAddonName: k8sComponentVersions["1.10"]["aci-connector"], + ContainerMonitoringAddonName: k8sComponentVersions["1.10"][ContainerMonitoringAddonName], + AzureCNINetworkMonitoringAddonName: k8sComponentVersions["1.10"][AzureCNINetworkMonitoringAddonName], + DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.10"]["cluster-autoscaler"], + NVIDIADevicePluginAddonName: k8sComponentVersions["1.10"][NVIDIADevicePluginAddonName], + "nodestatusfreq": k8sComponentVersions["1.10"]["nodestatusfreq"], + "nodegraceperiod": k8sComponentVersions["1.10"]["nodegraceperiod"], + "podeviction": k8sComponentVersions["1.10"]["podeviction"], + "routeperiod": k8sComponentVersions["1.10"]["routeperiod"], + "backoffretries": k8sComponentVersions["1.10"]["backoffretries"], + "backoffjitter": k8sComponentVersions["1.10"]["backoffjitter"], + "backoffduration": k8sComponentVersions["1.10"]["backoffduration"], + "backoffexponent": k8sComponentVersions["1.10"]["backoffexponent"], + "ratelimitqps": k8sComponentVersions["1.10"]["ratelimitqps"], + "ratelimitbucket": k8sComponentVersions["1.10"]["ratelimitbucket"], + "gchighthreshold": k8sComponentVersions["1.10"]["gchighthreshold"], + "gclowthreshold": k8sComponentVersions["1.10"]["gclowthreshold"], + } + + for k, v := range oneDotTenDotZero { + if expected[k] != v { + t.Fatalf("getK8sVersionComponents() returned an unexpected map[string]string value for k8s 1.10.0: %s = %s", k, oneDotTenDotZero[k]) + } + } + oneDotNineDotThree := getK8sVersionComponents("1.9.3", nil) if oneDotNineDotThree == nil { t.Fatalf("getK8sVersionComponents() should not return nil for valid version") @@ -71,7 +117,7 @@ func TestGetK8sVersionComponents(t *testing.T) { DefaultTillerAddonName: k8sComponentVersions["1.9"]["tiller"], DefaultReschedulerAddonName: k8sComponentVersions["1.9"]["rescheduler"], DefaultACIConnectorAddonName: k8sComponentVersions["1.9"]["aci-connector"], - ContainerMonitoringAddonName: k8sComponentVersions["1.11"][ContainerMonitoringAddonName], + ContainerMonitoringAddonName: k8sComponentVersions["1.9"][ContainerMonitoringAddonName], AzureCNINetworkMonitoringAddonName: k8sComponentVersions["1.9"][AzureCNINetworkMonitoringAddonName], DefaultClusterAutoscalerAddonName: k8sComponentVersions["1.9"]["cluster-autoscaler"], "nodestatusfreq": k8sComponentVersions["1.9"]["nodestatusfreq"], diff --git a/pkg/acsengine/params_k8s.go b/pkg/acsengine/params_k8s.go index 1ca849050d..2dfdae469f 100644 --- a/pkg/acsengine/params_k8s.go +++ b/pkg/acsengine/params_k8s.go @@ -124,13 +124,13 @@ func assignKubernetesParameters(properties *api.Properties, parametersMap params addValue(parametersMap, "kubernetesMetricsServerSpec", cloudSpecConfig.KubernetesSpecConfig.KubernetesImageBase+KubeConfigs[k8sVersion][DefaultMetricsServerAddonName]) } } - nvidiaDevicePluginAddon := getAddonByName(properties.OrchestratorProfile.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonName) - c = getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, DefaultNVIDIADevicePluginAddonName) + nvidiaDevicePluginAddon := getAddonByName(properties.OrchestratorProfile.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) + c = getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, NVIDIADevicePluginAddonName) if c > -1 { if nvidiaDevicePluginAddon.Containers[c].Image != "" { addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", nvidiaDevicePluginAddon.Containers[c].Image) } else { - addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase+KubeConfigs[k8sVersion][DefaultNVIDIADevicePluginAddonName]) + addValue(parametersMap, "kubernetesNVIDIADevicePluginSpec", cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase+KubeConfigs[k8sVersion][NVIDIADevicePluginAddonName]) } } containerMonitoringAddon := getAddonByName(properties.OrchestratorProfile.KubernetesConfig.Addons, ContainerMonitoringAddonName) diff --git a/pkg/acsengine/template_generator.go b/pkg/acsengine/template_generator.go index 9266e2c332..aefc02f023 100644 --- a/pkg/acsengine/template_generator.go +++ b/pkg/acsengine/template_generator.go @@ -188,6 +188,10 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat storagetier, _ := getStorageAccountType(profile.VMSize) buf.WriteString(fmt.Sprintf(",storageprofile=managed,storagetier=%s", storagetier)) } + if isNSeriesSKU(profile) { + accelerator := "nvidia" + buf.WriteString(fmt.Sprintf(",accelerator=%s", accelerator)) + } buf.WriteString(fmt.Sprintf(",kubernetes.azure.com/cluster=%s", rg)) for k, v := range profile.CustomNodeLabels { buf.WriteString(fmt.Sprintf(",%s=%s", k, v)) @@ -752,8 +756,8 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat rC := getAddonContainersIndexByName(reschedulerAddon.Containers, DefaultReschedulerAddonName) metricsServerAddon := getAddonByName(cs.Properties.OrchestratorProfile.KubernetesConfig.Addons, DefaultMetricsServerAddonName) mC := getAddonContainersIndexByName(metricsServerAddon.Containers, DefaultMetricsServerAddonName) - nvidiaDevicePluginAddon := getAddonByName(cs.Properties.OrchestratorProfile.KubernetesConfig.Addons, DefaultNVIDIADevicePluginAddonName) - nC := getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, DefaultNVIDIADevicePluginAddonName) + nvidiaDevicePluginAddon := getAddonByName(cs.Properties.OrchestratorProfile.KubernetesConfig.Addons, NVIDIADevicePluginAddonName) + nC := getAddonContainersIndexByName(nvidiaDevicePluginAddon.Containers, NVIDIADevicePluginAddonName) switch attr { case "kubernetesHyperkubeSpec": val = cs.Properties.OrchestratorProfile.KubernetesConfig.KubernetesImageBase + KubeConfigs[k8sVersion]["hyperkube"] @@ -959,7 +963,7 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat val = nvidiaDevicePluginAddon.Containers[nC].Image } } else { - val = cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase + KubeConfigs[k8sVersion][DefaultNVIDIADevicePluginAddonName] + val = cloudSpecConfig.KubernetesSpecConfig.NVIDIAImageBase + KubeConfigs[k8sVersion][NVIDIADevicePluginAddonName] } case "kubernetesReschedulerSpec": if rC > -1 { diff --git a/pkg/api/const.go b/pkg/api/const.go index ca5b112475..2cba4f69a6 100644 --- a/pkg/api/const.go +++ b/pkg/api/const.go @@ -119,8 +119,8 @@ const ( DefaultReschedulerAddonName = "rescheduler" // DefaultMetricsServerAddonName is the name of the kubernetes metrics server addon deployment DefaultMetricsServerAddonName = "metrics-server" - // DefaultNVIDIADevicePluginAddonName is the name of the NVIDIA device plugin addon deployment - DefaultNVIDIADevicePluginAddonName = "nvidia-device-plugin" + // NVIDIADevicePluginAddonName is the name of the NVIDIA device plugin addon deployment + NVIDIADevicePluginAddonName = "nvidia-device-plugin" // ContainerMonitoringAddonName is the name of the kubernetes Container Monitoring addon deployment ContainerMonitoringAddonName = "container-monitoring" // DefaultPrivateClusterEnabled determines the acs-engine provided default for enabling kubernetes Private Cluster diff --git a/pkg/api/types.go b/pkg/api/types.go index d486f04022..f443dc2a9b 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -943,7 +943,7 @@ func (p *Properties) IsNVIDIADevicePluginEnabled() bool { k := p.OrchestratorProfile.KubernetesConfig o := p.OrchestratorProfile for i := range k.Addons { - if k.Addons[i].Name == DefaultNVIDIADevicePluginAddonName { + if k.Addons[i].Name == NVIDIADevicePluginAddonName { nvidiaDevicePluginAddon = k.Addons[i] } } diff --git a/pkg/api/types_test.go b/pkg/api/types_test.go index 6a6d346587..5626976f0f 100644 --- a/pkg/api/types_test.go +++ b/pkg/api/types_test.go @@ -898,7 +898,7 @@ func TestIsNVIDIADevicePluginEnabled(t *testing.T) { p.AgentPoolProfiles[0].VMSize = "Standard_D2_v2" p.OrchestratorProfile.KubernetesConfig.Addons = []KubernetesAddon{ { - Name: DefaultNVIDIADevicePluginAddonName, + Name: NVIDIADevicePluginAddonName, Enabled: helpers.PointerToBool(false), }, } diff --git a/pkg/api/vlabs/validate.go b/pkg/api/vlabs/validate.go index 59395c5b13..baeca617fd 100644 --- a/pkg/api/vlabs/validate.go +++ b/pkg/api/vlabs/validate.go @@ -427,24 +427,26 @@ func (a *Properties) validateAddons() error { return fmt.Errorf("Cluster Autoscaler add-on can only be used with VirtualMachineScaleSets. Please specify \"availabilityProfile\": \"%s\"", VirtualMachineScaleSets) } case "nvidia-device-plugin": - version := common.RationalizeReleaseAndVersion( - a.OrchestratorProfile.OrchestratorType, - a.OrchestratorProfile.OrchestratorRelease, - a.OrchestratorProfile.OrchestratorVersion, - false) - if version == "" { - return fmt.Errorf("the following user supplied OrchestratorProfile configuration is not supported: OrchestratorType: %s, OrchestratorRelease: %s, OrchestratorVersion: %s. Please check supported Release or Version for this build of acs-engine", a.OrchestratorProfile.OrchestratorType, a.OrchestratorProfile.OrchestratorRelease, a.OrchestratorProfile.OrchestratorVersion) - } - sv, err := semver.Make(version) - if err != nil { - return fmt.Errorf("could not validate version %s", version) - } - minVersion, err := semver.Make("1.10.0") - if err != nil { - return fmt.Errorf("could not validate version") - } - if isNSeriesSKU && sv.LT(minVersion) { - return fmt.Errorf("NVIDIA Device Plugin add-on can only be used Kubernetes 1.10 or above. Please specify \"orchestratorRelease\": \"1.10\"") + if helpers.IsTrueBoolPointer(addon.Enabled) { + version := common.RationalizeReleaseAndVersion( + a.OrchestratorProfile.OrchestratorType, + a.OrchestratorProfile.OrchestratorRelease, + a.OrchestratorProfile.OrchestratorVersion, + false) + if version == "" { + return fmt.Errorf("the following user supplied OrchestratorProfile configuration is not supported: OrchestratorType: %s, OrchestratorRelease: %s, OrchestratorVersion: %s. Please check supported Release or Version for this build of acs-engine", a.OrchestratorProfile.OrchestratorType, a.OrchestratorProfile.OrchestratorRelease, a.OrchestratorProfile.OrchestratorVersion) + } + sv, err := semver.Make(version) + if err != nil { + return fmt.Errorf("could not validate version %s", version) + } + minVersion, err := semver.Make("1.10.0") + if err != nil { + return fmt.Errorf("could not validate version") + } + if isNSeriesSKU && sv.LT(minVersion) { + return fmt.Errorf("NVIDIA Device Plugin add-on can only be used Kubernetes 1.10 or above. Please specify \"orchestratorRelease\": \"1.10\"") + } } } }