Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use dockerd if requesting gpus #1588

Merged
merged 1 commit into from
Mar 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion pkg/cloudprovider/aws/amifamily/al2.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ func (a AL2) SSMAlias(version string, instanceType cloudprovider.InstanceType) s
// even if elements of those inputs are in differing orders,
// guaranteeing it won't cause spurious hash differences.
// AL2 userdata also works on Ubuntu
func (a AL2) UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string) bootstrap.Bootstrapper {
func (a AL2) UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string, instanceTypes []cloudprovider.InstanceType) bootstrap.Bootstrapper {
return bootstrap.EKS{
ContainerRuntime: a.containerRuntime(instanceTypes),
Options: bootstrap.Options{
ClusterName: a.Options.ClusterName,
ClusterEndpoint: a.Options.ClusterEndpoint,
Expand All @@ -61,6 +62,19 @@ func (a AL2) UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []cor
}
}

// containerRuntime will return the proper container runtime based on the capabilities of the
// instanceTypes passed in since the AL2 EKS Optimized AMI does not support GPUs w/ containerd.
// this should be removed once the EKS Optimized AMI supports GPUs through containerd
func (a AL2) containerRuntime(instanceTypes []cloudprovider.InstanceType) string {
instanceResources := instanceTypes[0].Resources()
if resources.IsZero(instanceResources[v1alpha1.ResourceNVIDIAGPU]) &&
resources.IsZero(instanceResources[v1alpha1.ResourceAMDGPU]) &&
resources.IsZero(instanceResources[v1alpha1.ResourceAWSNeuron]) {
return "containerd"
}
return "dockerd"
}

// DefaultBlockDeviceMappings returns the default block device mappings for the AMI Family
func (a AL2) DefaultBlockDeviceMappings() []*v1alpha1.BlockDeviceMapping {
return []*v1alpha1.BlockDeviceMapping{{
Expand Down
1 change: 1 addition & 0 deletions pkg/cloudprovider/aws/amifamily/bootstrap/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type Options struct {
Labels map[string]string `hash:"set"`
CABundle *string
AWSENILimitedPodDensity bool
ContainerRuntime *string
}

// Bootstrapper can be implemented to generate a bootstrap script
Expand Down
4 changes: 4 additions & 0 deletions pkg/cloudprovider/aws/amifamily/bootstrap/eksbootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

type EKS struct {
Options
ContainerRuntime string
}

func (e EKS) Script() string {
Expand All @@ -45,6 +46,9 @@ func (e EKS) Script() string {
userData.WriteString(" \\\n--use-max-pods false")
kubeletExtraArgs += " --max-pods=110"
}
if e.ContainerRuntime != "" {
userData.WriteString(fmt.Sprintf(" \\\n--container-runtime %s", e.ContainerRuntime))
}
if kubeletExtraArgs = strings.Trim(kubeletExtraArgs, " "); len(kubeletExtraArgs) > 0 {
userData.WriteString(fmt.Sprintf(" \\\n--kubelet-extra-args '%s'", kubeletExtraArgs))
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/aws/amifamily/bottlerocket.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (b Bottlerocket) SSMAlias(version string, instanceType cloudprovider.Instan
}

// UserData returns the default userdata script for the AMI Family
func (b Bottlerocket) UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string) bootstrap.Bootstrapper {
func (b Bottlerocket) UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string, _ []cloudprovider.InstanceType) bootstrap.Bootstrapper {
return bootstrap.Bottlerocket{
Options: bootstrap.Options{
ClusterName: b.Options.ClusterName,
Expand Down
4 changes: 2 additions & 2 deletions pkg/cloudprovider/aws/amifamily/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type LaunchTemplate struct {

// AMIFamily can be implemented to override the default logic for generating dynamic launch template parameters
type AMIFamily interface {
UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string) bootstrap.Bootstrapper
UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string, instanceTypes []cloudprovider.InstanceType) bootstrap.Bootstrapper
SSMAlias(version string, instanceType cloudprovider.InstanceType) string
DefaultBlockDeviceMappings() []*v1alpha1.BlockDeviceMapping
DefaultMetadataOptions() *v1alpha1.MetadataOptions
Expand Down Expand Up @@ -99,7 +99,7 @@ func (r Resolver) Resolve(ctx context.Context, constraints *v1alpha1.Constraints
for amiID, instanceTypes := range amiIDs {
resolved := &LaunchTemplate{
Options: options,
UserData: amiFamily.UserData(constraints.KubeletConfiguration, constraints.Taints, options.Labels, options.CABundle),
UserData: amiFamily.UserData(constraints.KubeletConfiguration, constraints.Taints, options.Labels, options.CABundle, instanceTypes),
BlockDeviceMappings: constraints.BlockDeviceMappings,
MetadataOptions: constraints.MetadataOptions,
AMIID: amiID,
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/aws/amifamily/ubuntu.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func (u Ubuntu) SSMAlias(version string, instanceType cloudprovider.InstanceType
}

// UserData returns the default userdata script for the AMI Family
func (u Ubuntu) UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string) bootstrap.Bootstrapper {
func (u Ubuntu) UserData(kubeletConfig *v1alpha5.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string, _ []cloudprovider.InstanceType) bootstrap.Bootstrapper {
return bootstrap.EKS{
Options: bootstrap.Options{
ClusterName: u.Options.ClusterName,
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/aws/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func (i *InstanceType) amdGPUs() resource.Quantity {
count := int64(0)
if i.GpuInfo != nil {
for _, gpu := range i.GpuInfo.Gpus {
if *gpu.Manufacturer == "NVIDIA" {
if *gpu.Manufacturer == "AMD" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

count += *gpu.Count
}
}
Expand Down
26 changes: 26 additions & 0 deletions pkg/cloudprovider/aws/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,32 @@ var _ = Describe("Allocation", func() {
Expect(string(userData)).To(ContainSubstring("--use-max-pods false"))
Expect(string(userData)).To(ContainSubstring("--max-pods=110"))
})
It("should specify --container-runtime containerd by default", func() {
pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, ProvisionerWithProvider(provisioner, provider), test.UnschedulablePod())[0]
ExpectScheduled(ctx, env.Client, pod)
Expect(fakeEC2API.CalledWithCreateLaunchTemplateInput.Cardinality()).To(Equal(1))
input := fakeEC2API.CalledWithCreateLaunchTemplateInput.Pop().(*ec2.CreateLaunchTemplateInput)
userData, _ := base64.StdEncoding.DecodeString(*input.LaunchTemplateData.UserData)
Expect(string(userData)).To(ContainSubstring("--container-runtime containerd"))
})
It("should specify --container-runtime dockerd when using GPUs", func() {
pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, ProvisionerWithProvider(provisioner, provider), test.UnschedulablePod(test.PodOptions{
ResourceRequirements: v1.ResourceRequirements{
Requests: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("1"),
v1alpha1.ResourceNVIDIAGPU: resource.MustParse("1"),
},
Limits: map[v1.ResourceName]resource.Quantity{
v1alpha1.ResourceNVIDIAGPU: resource.MustParse("1"),
},
},
}))[0]
ExpectScheduled(ctx, env.Client, pod)
Expect(fakeEC2API.CalledWithCreateLaunchTemplateInput.Cardinality()).To(Equal(1))
input := fakeEC2API.CalledWithCreateLaunchTemplateInput.Pop().(*ec2.CreateLaunchTemplateInput)
userData, _ := base64.StdEncoding.DecodeString(*input.LaunchTemplateData.UserData)
Expect(string(userData)).To(ContainSubstring("--container-runtime dockerd"))
})
Context("Kubelet Args", func() {
It("should specify the --dns-cluster-ip flag when clusterDNSIP is set", func() {
provisioner.Spec.KubeletConfiguration = &v1alpha5.KubeletConfiguration{ClusterDNS: []string{"10.0.10.100"}}
Expand Down