Skip to content
This repository has been archived by the owner on Jan 11, 2023. It is now read-only.

Commit

Permalink
ensure N series clusters get aks-docker-engine (#4221)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackfrancis committed Nov 9, 2018
1 parent 546e4c6 commit 1b07ace
Show file tree
Hide file tree
Showing 14 changed files with 370 additions and 78 deletions.
38 changes: 0 additions & 38 deletions pkg/acsengine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,44 +363,6 @@ func getDCOSDefaultRepositoryURL(orchestratorType string, orchestratorVersion st
return ""
}

func isNSeriesSKU(profile *api.AgentPoolProfile) bool {
/* If a new GPU sku becomes available, add a key to this map, but only if you have a confirmation
that we have an agreement with NVIDIA for this specific gpu.
*/
dm := map[string]bool{
// K80
"Standard_NC6": true,
"Standard_NC12": true,
"Standard_NC24": true,
"Standard_NC24r": true,
// M60
"Standard_NV6": true,
"Standard_NV12": true,
"Standard_NV24": true,
"Standard_NV24r": true,
// P40
"Standard_ND6s": true,
"Standard_ND12s": true,
"Standard_ND24s": true,
"Standard_ND24rs": true,
// P100
"Standard_NC6s_v2": true,
"Standard_NC12s_v2": true,
"Standard_NC24s_v2": true,
"Standard_NC24rs_v2": true,
// V100
"Standard_NC6s_v3": true,
"Standard_NC12s_v3": true,
"Standard_NC24s_v3": true,
"Standard_NC24rs_v3": true,
}
if _, ok := dm[profile.VMSize]; ok {
return dm[profile.VMSize]
}

return false
}

func getDCOSCustomDataPublicIPStr(orchestratorType string, masterCount int) string {
if orchestratorType == api.DCOS {
var buf bytes.Buffer
Expand Down
9 changes: 5 additions & 4 deletions pkg/acsengine/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/Azure/acs-engine/pkg/acsengine/transform"
"github.com/Azure/acs-engine/pkg/api"
"github.com/Azure/acs-engine/pkg/api/common"
"github.com/Azure/acs-engine/pkg/api/v20160330"
"github.com/Azure/acs-engine/pkg/api/vlabs"
"github.com/Azure/acs-engine/pkg/i18n"
Expand Down Expand Up @@ -497,14 +498,14 @@ func TestIsNSeriesSKU(t *testing.T) {
}

for _, sku := range validSkus {
if !isNSeriesSKU(&api.AgentPoolProfile{VMSize: sku}) {
t.Fatalf("Expected isNSeriesSKU(%s) to be true", sku)
if !common.IsNvidiaEnabledSKU(sku) {
t.Fatalf("Expected common.IsNvidiaEnabledSKU(%s) to be true", sku)
}
}

for _, sku := range invalidSkus {
if isNSeriesSKU(&api.AgentPoolProfile{VMSize: sku}) {
t.Fatalf("Expected isNSeriesSKU(%s) to be false", sku)
if common.IsNvidiaEnabledSKU(sku) {
t.Fatalf("Expected common.IsNvidiaEnabledSKU(%s) to be false", sku)
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/acsengine/template_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat
storagetier, _ := getStorageAccountType(profile.VMSize)
buf.WriteString(fmt.Sprintf(",storageprofile=managed,storagetier=%s", storagetier))
}
if isNSeriesSKU(profile) {
if common.IsNvidiaEnabledSKU(profile.VMSize) {
accelerator := "nvidia"
buf.WriteString(fmt.Sprintf(",accelerator=%s", accelerator))
}
Expand Down Expand Up @@ -786,7 +786,7 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat
return cs.Properties.IsNVIDIADevicePluginEnabled()
},
"IsNSeriesSKU": func(profile *api.AgentPoolProfile) bool {
return isNSeriesSKU(profile)
return common.IsNvidiaEnabledSKU(profile.VMSize)
},
"UseSinglePlacementGroup": func(profile *api.AgentPoolProfile) bool {
return *profile.SinglePlacementGroup
Expand Down
2 changes: 1 addition & 1 deletion pkg/api/addons.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (cs *ContainerService) setAddonsConfig() {

defaultNVIDIADevicePluginAddonsConfig := KubernetesAddon{
Name: NVIDIADevicePluginAddonName,
Enabled: helpers.PointerToBool(IsNSeriesSKU(cs.Properties) && common.IsKubernetesVersionGe(o.OrchestratorVersion, "1.10.0")),
Enabled: helpers.PointerToBool(cs.Properties.HasNSeriesSKU() && common.IsKubernetesVersionGe(o.OrchestratorVersion, "1.10.0")),
Containers: []KubernetesContainerSpec{
{
Name: NVIDIADevicePluginAddonName,
Expand Down
149 changes: 149 additions & 0 deletions pkg/api/common/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,152 @@ func ValidateDNSPrefix(dnsName string) error {
}
return nil
}

// IsNvidiaEnabledSKU determines if an VM SKU has nvidia driver support
func IsNvidiaEnabledSKU(vmSize string) bool {
/* If a new GPU sku becomes available, add a key to this map, but only if you have a confirmation
that we have an agreement with NVIDIA for this specific gpu.
*/
dm := map[string]bool{
// K80
"Standard_NC6": true,
"Standard_NC12": true,
"Standard_NC24": true,
"Standard_NC24r": true,
// M60
"Standard_NV6": true,
"Standard_NV12": true,
"Standard_NV24": true,
"Standard_NV24r": true,
// P40
"Standard_ND6s": true,
"Standard_ND12s": true,
"Standard_ND24s": true,
"Standard_ND24rs": true,
// P100
"Standard_NC6s_v2": true,
"Standard_NC12s_v2": true,
"Standard_NC24s_v2": true,
"Standard_NC24rs_v2": true,
// V100
"Standard_NC6s_v3": true,
"Standard_NC12s_v3": true,
"Standard_NC24s_v3": true,
"Standard_NC24rs_v3": true,
}
if _, ok := dm[vmSize]; ok {
return dm[vmSize]
}

return false
}

// GetNSeriesVMCasesForTesting returns a struct w/ VM SKUs and whether or not we expect them to be nvidia-enabled
func GetNSeriesVMCasesForTesting() []struct {
VMSKU string
Expected bool
} {
cases := []struct {
VMSKU string
Expected bool
}{
{
"Standard_NC6",
true,
},
{
"Standard_NC12",
true,
},
{
"Standard_NC24",
true,
},
{
"Standard_NC24r",
true,
},
{
"Standard_NV6",
true,
},
{
"Standard_NV12",
true,
},
{
"Standard_NV24",
true,
},
{
"Standard_NV24r",
true,
},
{
"Standard_ND6s",
true,
},
{
"Standard_ND12s",
true,
},
{
"Standard_ND24s",
true,
},
{
"Standard_ND24rs",
true,
},
{
"Standard_NC6s_v2",
true,
},
{
"Standard_NC12s_v2",
true,
},
{
"Standard_NC24s_v2",
true,
},
{
"Standard_NC24rs_v2",
true,
},
{
"Standard_NC24rs_v2",
true,
},
{
"Standard_NC6s_v3",
true,
},
{
"Standard_NC12s_v3",
true,
},
{
"Standard_NC24s_v3",
true,
},
{
"Standard_NC24rs_v3",
true,
},
{
"Standard_D2_v2",
false,
},
{
"gobledygook",
false,
},
{
"",
false,
},
}

return cases
}
11 changes: 11 additions & 0 deletions pkg/api/common/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,14 @@ func TestValidateDNSPrefix(t *testing.T) {
}
}
}

func TestIsNvidiaEnabledSKU(t *testing.T) {
cases := GetNSeriesVMCasesForTesting()

for _, c := range cases {
ret := IsNvidiaEnabledSKU(c.VMSKU)
if ret != c.Expected {
t.Fatalf("expected IsNvidiaEnabledSKU(%s) to return %t, but instead got %t", c.VMSKU, c.Expected, ret)
}
}
}
30 changes: 20 additions & 10 deletions pkg/api/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,19 +433,29 @@ func (p *Properties) setAgentProfileDefaults(isUpgrade, isScale bool) {
profile.AcceleratedNetworkingEnabledWindows = helpers.PointerToBool(DefaultAcceleratedNetworkingWindowsEnabled)
}

if profile.Distro == "" && profile.OSType != Windows {
if p.OrchestratorProfile.IsKubernetes() {
if profile.OSDiskSizeGB != 0 && profile.OSDiskSizeGB < VHDDiskSizeAKS {
profile.Distro = Ubuntu
} else {
if IsNSeriesSKU(p) {
profile.Distro = AKSDockerEngine
if profile.OSType != Windows {
if profile.Distro == "" {
if p.OrchestratorProfile.IsKubernetes() {
if profile.OSDiskSizeGB != 0 && profile.OSDiskSizeGB < VHDDiskSizeAKS {
profile.Distro = Ubuntu
} else {
profile.Distro = AKS
if profile.IsNSeriesSKU() {
profile.Distro = AKSDockerEngine
} else {
profile.Distro = AKS
}
}
} else if !p.OrchestratorProfile.IsOpenShift() {
profile.Distro = Ubuntu
}
// Ensure distro is set properly for N Series SKUs, because
// (1) At present, "aks-docker-engine" and "ubuntu" are the only working distro base for running GPU workloads on N Series SKUs
// (2) Previous versions of acs-engine had working implementations using the "aks" distro value,
// so we need to hard override it in order to produce a working cluster in upgrade/scale contexts
} else if p.OrchestratorProfile.IsKubernetes() && (isUpgrade || isScale) && profile.IsNSeriesSKU() {
if profile.Distro == AKS {
profile.Distro = AKSDockerEngine
}
} else if !p.OrchestratorProfile.IsOpenShift() {
profile.Distro = Ubuntu
}
}

Expand Down
Loading

0 comments on commit 1b07ace

Please sign in to comment.