-
Notifications
You must be signed in to change notification settings - Fork 980
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding initial support for bottlerocket without custom launch template #1110
Changes from all commits
ba99090
cd736bc
6de2379
0f8cafa
e6bc0e7
ee74210
64533d9
7640e6b
dbeddfa
6818aff
64b650a
e30936c
b504aee
586c42b
a89b7be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ import ( | |
"github.com/aws/aws-sdk-go/service/ssm/ssmiface" | ||
"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5" | ||
"github.com/aws/karpenter/pkg/cloudprovider" | ||
"github.com/aws/karpenter/pkg/cloudprovider/aws/apis/v1alpha1" | ||
"github.com/patrickmn/go-cache" | ||
"k8s.io/client-go/kubernetes" | ||
"knative.dev/pkg/logging" | ||
|
@@ -46,15 +47,15 @@ func NewAMIProvider(ssm ssmiface.SSMAPI, clientSet *kubernetes.Clientset) *AMIPr | |
} | ||
|
||
// Get returns a set of AMIIDs and corresponding instance types. AMI may vary due to architecture, accelerator, etc | ||
func (p *AMIProvider) Get(ctx context.Context, instanceTypes []cloudprovider.InstanceType) (map[string][]cloudprovider.InstanceType, error) { | ||
func (p *AMIProvider) Get(ctx context.Context, constraints *v1alpha1.Constraints, instanceTypes []cloudprovider.InstanceType) (map[string][]cloudprovider.InstanceType, error) { | ||
version, err := p.kubeServerVersion(ctx) | ||
if err != nil { | ||
return nil, fmt.Errorf("kube server version, %w", err) | ||
} | ||
// Separate instance types by unique queries | ||
amiQueries := map[string][]cloudprovider.InstanceType{} | ||
for _, instanceType := range instanceTypes { | ||
query := p.getSSMQuery(instanceType, version) | ||
query := p.getSSMQuery(ctx, constraints, instanceType, version) | ||
amiQueries[query] = append(amiQueries[query], instanceType) | ||
} | ||
// Separate instance types by unique AMIIDs | ||
|
@@ -83,16 +84,42 @@ func (p *AMIProvider) getAMIID(ctx context.Context, query string) (string, error | |
return ami, nil | ||
} | ||
|
||
func (p *AMIProvider) getSSMQuery(instanceType cloudprovider.InstanceType, version string) string { | ||
var amiSuffix string | ||
// getAL2Alias returns a properly-formatted alias for an Amazon Linux AMI from SSM | ||
func (p *AMIProvider) getAL2Alias(version string, instanceType cloudprovider.InstanceType) string { | ||
amiSuffix := "" | ||
if !instanceType.NvidiaGPUs().IsZero() || !instanceType.AWSNeurons().IsZero() { | ||
amiSuffix = "-gpu" | ||
} else if instanceType.Architecture() == v1alpha5.ArchitectureArm64 { | ||
amiSuffix = "-arm64" | ||
amiSuffix = fmt.Sprintf("-%s", instanceType.Architecture()) | ||
} | ||
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2%s/recommended/image_id", version, amiSuffix) | ||
} | ||
|
||
// getBottlerocketAlias returns a properly-formatted alias for a Bottlerocket AMI from SSM | ||
func (p *AMIProvider) getBottlerocketAlias(version string, instanceType cloudprovider.InstanceType) string { | ||
arch := "x86_64" | ||
if instanceType.Architecture() == v1alpha5.ArchitectureArm64 { | ||
arch = instanceType.Architecture() | ||
} | ||
return fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/%s/latest/image_id", version, arch) | ||
} | ||
|
||
func (p *AMIProvider) getSSMQuery(ctx context.Context, constraints *v1alpha1.Constraints, instanceType cloudprovider.InstanceType, version string) string { | ||
ssmQuery := p.getAL2Alias(version, instanceType) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add documentation changes as part of this PR? We should call out that you will be defaulted to the EKS-optimized AMI. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is optional - we can have this be a separate PR too. Just want to make sure we include it before we do another release :) |
||
if constraints.AMIFamily != nil { | ||
rayterrill marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if *constraints.AMIFamily == v1alpha1.OperatingSystemBottleRocket { | ||
ssmQuery = p.getBottlerocketAlias(version, instanceType) | ||
} else if *constraints.AMIFamily == v1alpha1.OperatingSystemEKSOptimized { | ||
ssmQuery = p.getAL2Alias(version, instanceType) | ||
} else { | ||
logging.FromContext(ctx).Warnf("AMIFamily was set, but was not one of %s or %s. Setting to %s as the default.", v1alpha1.OperatingSystemEKSOptimized, v1alpha1.OperatingSystemBottleRocket, v1alpha1.OperatingSystemEKSOptimized) | ||
ssmQuery = p.getAL2Alias(version, instanceType) | ||
} | ||
} | ||
|
||
return ssmQuery | ||
} | ||
|
||
func (p *AMIProvider) kubeServerVersion(ctx context.Context) (string, error) { | ||
if version, ok := p.cache.Get(kubernetesVersionCacheKey); ok { | ||
return version.(string), nil | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,15 +100,27 @@ func (p *LaunchTemplateProvider) Get(ctx context.Context, constraints *v1alpha1. | |
return nil, err | ||
} | ||
// Get constrained AMI ID | ||
amis, err := p.amiProvider.Get(ctx, instanceTypes) | ||
amis, err := p.amiProvider.Get(ctx, constraints, instanceTypes) | ||
if err != nil { | ||
return nil, err | ||
} | ||
// Construct launch templates | ||
launchTemplates := map[string][]cloudprovider.InstanceType{} | ||
for amiID, instanceTypes := range amis { | ||
// Get userData for Node | ||
userData, err := p.getUserData(ctx, constraints, instanceTypes, additionalLabels) | ||
var userData string | ||
if constraints.AMIFamily != nil { | ||
if strings.EqualFold(*constraints.AMIFamily, v1alpha1.OperatingSystemBottleRocket) { | ||
userData, err = p.getBottleRocketUserData(ctx, constraints, additionalLabels) | ||
} else if strings.EqualFold(*constraints.AMIFamily, v1alpha1.OperatingSystemEKSOptimized) { | ||
userData, err = p.getEKSOptimizedUserData(ctx, constraints, instanceTypes, additionalLabels) | ||
} else { | ||
logging.FromContext(ctx).Warnf("AMIFamily was set, but was not one of %s or %s. Setting to %s as the default.", v1alpha1.OperatingSystemEKSOptimized, v1alpha1.OperatingSystemBottleRocket, v1alpha1.OperatingSystemEKSOptimized) | ||
userData, err = p.getEKSOptimizedUserData(ctx, constraints, instanceTypes, additionalLabels) | ||
bwagner5 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} else { | ||
userData, err = p.getEKSOptimizedUserData(ctx, constraints, instanceTypes, additionalLabels) | ||
} | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
@@ -240,10 +252,52 @@ func sortedKeys(m map[string]string) []string { | |
return keys | ||
} | ||
|
||
// getUserData returns the exact same string for equivalent input, | ||
// even if elements of those inputs are in differeing orders, | ||
func (p *LaunchTemplateProvider) getBottleRocketUserData(ctx context.Context, constraints *v1alpha1.Constraints, additionalLabels map[string]string) (string, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also support maxPods as that was functionality added in this PR. For BR, it should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be a follow-up change too :) |
||
var userData string | ||
userData += fmt.Sprintf(`[settings.kubernetes] | ||
cluster-name = "%s" | ||
api-server = "%s" | ||
`, | ||
injection.GetOptions(ctx).ClusterName, | ||
injection.GetOptions(ctx).ClusterEndpoint) | ||
|
||
if constraints.KubeletConfiguration.ClusterDNS != nil { | ||
userData += fmt.Sprintf("cluster-dns-ip = \"%s\"", constraints.KubeletConfiguration.ClusterDNS) | ||
} | ||
|
||
caBundle, err := p.GetCABundle(ctx) | ||
bwagner5 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return "", fmt.Errorf("getting ca bundle for user data, %w", err) | ||
} | ||
if caBundle != nil { | ||
userData += fmt.Sprintf("cluster-certificate = \"%s\"\n", *caBundle) | ||
} | ||
|
||
nodeLabelArgs := functional.UnionStringMaps(additionalLabels, constraints.Labels) | ||
if len(nodeLabelArgs) > 0 { | ||
userData += `[settings.kubernetes.node-labels] | ||
` | ||
for key, val := range nodeLabelArgs { | ||
userData += fmt.Sprintf("\"%s\" = \"%s\"", key, val) | ||
} | ||
} | ||
|
||
bwagner5 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if len(constraints.Taints) > 0 { | ||
userData += `[settings.kubernetes.node-taints] | ||
` | ||
sorted := sortedTaints(constraints.Taints) | ||
for _, taint := range sorted { | ||
userData += fmt.Sprintf("%s=%s:%s", taint.Key, taint.Value, taint.Effect) | ||
} | ||
} | ||
|
||
return base64.StdEncoding.EncodeToString([]byte(userData)), nil | ||
} | ||
|
||
// getEKSOptimizedUserData returns the exact same string for equivalent input, | ||
// even if elements of those inputs are in differing orders, | ||
// guaranteeing it won't cause spurious hash differences. | ||
func (p *LaunchTemplateProvider) getUserData(ctx context.Context, constraints *v1alpha1.Constraints, instanceTypes []cloudprovider.InstanceType, additionalLabels map[string]string) (string, error) { | ||
func (p *LaunchTemplateProvider) getEKSOptimizedUserData(ctx context.Context, constraints *v1alpha1.Constraints, instanceTypes []cloudprovider.InstanceType, additionalLabels map[string]string) (string, error) { | ||
var containerRuntimeArg string | ||
if !needsDocker(instanceTypes) { | ||
containerRuntimeArg = "--container-runtime containerd" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@suket22 , what about arm+gpus? https://aws.amazon.com/blogs/machine-learning/aws-and-nvidia-to-bring-arm-based-instances-with-gpus-to-the-cloud/. I don't see this here: https://docs.aws.amazon.com/eks/latest/userguide/retrieve-ami-id.html.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
EKS optimized AMIs for this combination doesn't exist right now