-
Notifications
You must be signed in to change notification settings - Fork 980
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding initial support for bottlerocket without custom launch template #1110
Changes from 5 commits
ba99090
cd736bc
6de2379
0f8cafa
e6bc0e7
ee74210
64533d9
7640e6b
dbeddfa
6818aff
64b650a
e30936c
b504aee
586c42b
a89b7be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ import ( | |
"github.com/aws/aws-sdk-go/service/ssm/ssmiface" | ||
"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5" | ||
"github.com/aws/karpenter/pkg/cloudprovider" | ||
"github.com/aws/karpenter/pkg/cloudprovider/aws/apis/v1alpha1" | ||
"github.com/patrickmn/go-cache" | ||
"k8s.io/client-go/kubernetes" | ||
"knative.dev/pkg/logging" | ||
|
@@ -46,15 +47,15 @@ func NewAMIProvider(ssm ssmiface.SSMAPI, clientSet *kubernetes.Clientset) *AMIPr | |
} | ||
|
||
// Get returns a set of AMIIDs and corresponding instance types. AMI may vary due to architecture, accelerator, etc | ||
func (p *AMIProvider) Get(ctx context.Context, instanceTypes []cloudprovider.InstanceType) (map[string][]cloudprovider.InstanceType, error) { | ||
func (p *AMIProvider) Get(ctx context.Context, constraints *v1alpha1.Constraints, instanceTypes []cloudprovider.InstanceType) (map[string][]cloudprovider.InstanceType, error) { | ||
version, err := p.kubeServerVersion(ctx) | ||
if err != nil { | ||
return nil, fmt.Errorf("kube server version, %w", err) | ||
} | ||
// Separate instance types by unique queries | ||
amiQueries := map[string][]cloudprovider.InstanceType{} | ||
for _, instanceType := range instanceTypes { | ||
query := p.getSSMQuery(instanceType, version) | ||
query := p.getSSMQuery(ctx, constraints, instanceType, version) | ||
amiQueries[query] = append(amiQueries[query], instanceType) | ||
} | ||
// Separate instance types by unique AMIIDs | ||
|
@@ -83,14 +84,29 @@ func (p *AMIProvider) getAMIID(ctx context.Context, query string) (string, error | |
return ami, nil | ||
} | ||
|
||
func (p *AMIProvider) getSSMQuery(instanceType cloudprovider.InstanceType, version string) string { | ||
func (p *AMIProvider) getSSMQuery(ctx context.Context, constraints *v1alpha1.Constraints, instanceType cloudprovider.InstanceType, version string) string { | ||
var amiSuffix string | ||
var arch string | ||
if !instanceType.NvidiaGPUs().IsZero() || !instanceType.AWSNeurons().IsZero() { | ||
amiSuffix = "-gpu" | ||
} else if instanceType.Architecture() == v1alpha5.ArchitectureArm64 { | ||
amiSuffix = "-arm64" | ||
arch = "arm64" | ||
} else if instanceType.Architecture() == v1alpha5.ArchitectureAmd64 { | ||
arch = "x86_64" | ||
} | ||
|
||
if constraints.AMIFamily != nil { | ||
rayterrill marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if *constraints.AMIFamily == v1alpha1.OperatingSystemBottleRocket { | ||
return fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/%s/latest/image_id", version, arch) | ||
} else if *constraints.AMIFamily == v1alpha1.OperatingSystemEKSOptimized { | ||
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2%s/recommended/image_id", version, amiSuffix) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could put this format string into a const since it's duplicated 3 times. Wouldn't want to accidentally typo one of those :D There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I'm following @bwagner5 - can you give me an example? Sorry mate still learning Go so maybe I'm just missing something simple - not sure how a const would work with interpolated values. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No worries! I had meant something like this:
However, after looking at this again, a better idea is to use a function. The constant alias format string may be confusing since it takes two parameters. Breaking them into distinct functions will hide the complexity of building up the alias, make it easier to test separately, and there's potentially more validation we could do in these funcs in the future. Can you do something like this?
This will encapsulate the aliasing logic (like knowing the arch determines the amiSuffix) into a function that returns the properly formatted alias. |
||
} else { | ||
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2%s/recommended/image_id", version, amiSuffix) | ||
} | ||
} else { | ||
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2%s/recommended/image_id", version, amiSuffix) | ||
} | ||
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2%s/recommended/image_id", version, amiSuffix) | ||
} | ||
|
||
func (p *AMIProvider) kubeServerVersion(ctx context.Context) (string, error) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,15 +94,26 @@ func (p *LaunchTemplateProvider) Get(ctx context.Context, constraints *v1alpha1. | |
return nil, err | ||
} | ||
// Get constrained AMI ID | ||
amis, err := p.amiProvider.Get(ctx, instanceTypes) | ||
amis, err := p.amiProvider.Get(ctx, constraints, instanceTypes) | ||
if err != nil { | ||
return nil, err | ||
} | ||
// Construct launch templates | ||
launchTemplates := map[string][]cloudprovider.InstanceType{} | ||
for amiID, instanceTypes := range amis { | ||
// Get userData for Node | ||
userData, err := p.getUserData(ctx, constraints, instanceTypes, additionalLabels) | ||
var userData string | ||
if constraints.AMIFamily != nil { | ||
if strings.EqualFold(*constraints.AMIFamily, v1alpha1.OperatingSystemBottleRocket) { | ||
userData, err = p.getBottleRocketUserData(ctx, constraints, additionalLabels) | ||
} else if strings.EqualFold(*constraints.AMIFamily, v1alpha1.OperatingSystemEKSOptimized) { | ||
userData, err = p.getEKSOptimizedUserData(ctx, constraints, instanceTypes, additionalLabels) | ||
} else { | ||
userData, err = p.getEKSOptimizedUserData(ctx, constraints, instanceTypes, additionalLabels) | ||
bwagner5 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} else { | ||
userData, err = p.getEKSOptimizedUserData(ctx, constraints, instanceTypes, additionalLabels) | ||
} | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
@@ -220,10 +231,43 @@ func sortedKeys(m map[string]string) []string { | |
return keys | ||
} | ||
|
||
// getUserData returns the exact same string for equivalent input, | ||
func (p *LaunchTemplateProvider) getBottleRocketUserData(ctx context.Context, constraints *v1alpha1.Constraints, additionalLabels map[string]string) (string, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also support maxPods as that was functionality added in this PR. For BR, it should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be a follow-up change too :) |
||
var userData string | ||
userData += fmt.Sprintf(`[settings.kubernetes] | ||
cluster-name = "%s" | ||
api-server = "%s" | ||
`, | ||
injection.GetOptions(ctx).ClusterName, | ||
injection.GetOptions(ctx).ClusterEndpoint) | ||
|
||
if constraints.KubeletConfiguration.ClusterDNS != nil { | ||
userData += fmt.Sprintf("cluster-dns-ip = \"%s\"", constraints.KubeletConfiguration.ClusterDNS) | ||
} | ||
|
||
caBundle, err := p.GetCABundle(ctx) | ||
bwagner5 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return "", fmt.Errorf("getting ca bundle for user data, %w", err) | ||
} | ||
if caBundle != nil { | ||
userData += fmt.Sprintf("cluster-certificate = \"%s\"\n", *caBundle) | ||
} | ||
|
||
nodeLabelArgs := functional.UnionStringMaps(additionalLabels, constraints.Labels) | ||
if len(nodeLabelArgs) > 0 { | ||
userData += `[settings.kubernetes.node-labels] | ||
` | ||
for key, val := range nodeLabelArgs { | ||
userData += fmt.Sprintf("\"%s\" = \"%s\"", key, val) | ||
} | ||
} | ||
|
||
bwagner5 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return base64.StdEncoding.EncodeToString([]byte(userData)), nil | ||
} | ||
|
||
// getEKSOptimizedUserData returns the exact same string for equivalent input, | ||
// even if elements of those inputs are in differing orders, | ||
// guaranteeing it won't cause spurious hash differences. | ||
func (p *LaunchTemplateProvider) getUserData(ctx context.Context, constraints *v1alpha1.Constraints, instanceTypes []cloudprovider.InstanceType, additionalLabels map[string]string) (string, error) { | ||
func (p *LaunchTemplateProvider) getEKSOptimizedUserData(ctx context.Context, constraints *v1alpha1.Constraints, instanceTypes []cloudprovider.InstanceType, additionalLabels map[string]string) (string, error) { | ||
var containerRuntimeArg string | ||
if !needsDocker(instanceTypes) { | ||
containerRuntimeArg = "--container-runtime containerd" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@suket22 , what about arm+gpus? https://aws.amazon.com/blogs/machine-learning/aws-and-nvidia-to-bring-arm-based-instances-with-gpus-to-the-cloud/. I don't see this here: https://docs.aws.amazon.com/eks/latest/userguide/retrieve-ami-id.html.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
EKS optimized AMIs for this combination doesn't exist right now