Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for Security Group specification/override #474

Merged
merged 6 commits into from
Jun 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions pkg/cloudprovider/aws/ami.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package aws

import (
"context"
"fmt"
"strings"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ssm"
"github.com/aws/aws-sdk-go/service/ssm/ssmiface"
"github.com/patrickmn/go-cache"
"go.uber.org/zap"
"k8s.io/client-go/kubernetes"
)

const kubernetesVersionCacheKey = "kubernetesVersion"
njtran marked this conversation as resolved.
Show resolved Hide resolved

type AMIProvider struct {
cache *cache.Cache
ssm ssmiface.SSMAPI
clientSet *kubernetes.Clientset
}

func NewAMIProvider(ssm ssmiface.SSMAPI, clientSet *kubernetes.Clientset) *AMIProvider {
return &AMIProvider{
ssm: ssm,
clientSet: clientSet,
cache: cache.New(CacheTTL, CacheCleanupInterval),
}
}

func (p *AMIProvider) Get(ctx context.Context, constraints *Constraints) (string, error) {
version, err := p.kubeServerVersion()
if err != nil {
return "", fmt.Errorf("kube server version, %w", err)
}
name := fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/%s/latest/image_id", version, KubeToAWSArchitectures[*constraints.Architecture])
if id, ok := p.cache.Get(name); ok {
return id.(string), nil
}
output, err := p.ssm.GetParameterWithContext(ctx, &ssm.GetParameterInput{Name: aws.String(name)})
if err != nil {
return "", fmt.Errorf("getting ssm parameter, %w", err)
}
prateekgogia marked this conversation as resolved.
Show resolved Hide resolved
ami := aws.StringValue(output.Parameter.Value)
p.cache.Set(name, ami, CacheTTL)
zap.S().Debugf("Discovered ami %s for query %s", ami, name)
return ami, nil
}

func (p *AMIProvider) kubeServerVersion() (string, error) {
if version, ok := p.cache.Get(kubernetesVersionCacheKey); ok {
return version.(string), nil
}
serverVersion, err := p.clientSet.Discovery().ServerVersion()
if err != nil {
return "", err
}
version := fmt.Sprintf("%s.%s", serverVersion.Major, strings.TrimSuffix(serverVersion.Minor, "+"))
p.cache.Set(kubernetesVersionCacheKey, version, CacheTTL)
zap.S().Debugf("Discovered kubernetes version %s", version)
return version, nil
}
22 changes: 12 additions & 10 deletions pkg/cloudprovider/aws/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,19 @@ import (
"github.com/awslabs/karpenter/pkg/cloudprovider"
"github.com/awslabs/karpenter/pkg/cloudprovider/aws/utils"
"github.com/awslabs/karpenter/pkg/utils/project"
"github.com/patrickmn/go-cache"
"go.uber.org/zap"
v1 "k8s.io/api/core/v1"
"knative.dev/pkg/apis"
)

const (
// CacheTTL restricts QPS to AWS APIs to this interval for verifying setup resources.
CacheTTL = 5 * time.Minute
// CacheTTL restricts QPS to AWS APIs to this interval for verifying setup
// resources. This value represents the maximum eventual consistency between
// AWS actual state and the controller's ability to provision those
// resources. Cache hits enable faster provisioning and reduced API load on
// AWS APIs, which can have a serious import on performance and scalability.
// DO NOT CHANGE THIS VALUE WITHOUT DUE CONSIDERATION
CacheTTL = 60 * time.Second
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noticed this we are setting cacheTTL at two places?

	p.cache.Set(name, ami, CacheTTL)

and here-

	return &AMIProvider{
		ssm:       ssm,
		clientSet: clientSet,
		cache:     cache.New(CacheTTL, CacheCleanupInterval),
	}

So may be we can just set the smaller cacheTTL for security groups instead of changing for all resources? WDYT?

// CacheCleanupInterval triggers cache cleanup (lazy eviction) at this interval.
CacheCleanupInterval = 10 * time.Minute
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize this was set in a previous PR, but I wanted to check if there was a specific reason we pick 10 minutes here and TTL as 60 seconds above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No reason. Just a shot in the dark.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This value was lowered to reduce @prateekgogia's concerns about 5 minutes being a long time to start using security groups and subnets after they're created.

// ClusterTagKeyFormat is set on all Kubernetes owned resources.
Expand Down Expand Up @@ -76,13 +80,11 @@ func NewCloudProvider(options cloudprovider.Options) *CloudProvider {
ec2api := ec2.New(sess)
return &CloudProvider{
nodeAPI: &NodeFactory{ec2api: ec2api},
launchTemplateProvider: &LaunchTemplateProvider{
ec2api: ec2api,
cache: cache.New(CacheTTL, CacheCleanupInterval),
securityGroupProvider: NewSecurityGroupProvider(ec2api),
ssm: ssm.New(sess),
clientSet: options.ClientSet,
},
launchTemplateProvider: NewLaunchTemplateProvider(
ec2api,
NewAMIProvider(ssm.New(sess), options.ClientSet),
NewSecurityGroupProvider(ec2api),
),
subnetProvider: NewSubnetProvider(ec2api),
instanceTypeProvider: NewInstanceTypeProvider(ec2api),
instanceProvider: &InstanceProvider{ec2api: ec2api},
Expand Down
36 changes: 28 additions & 8 deletions pkg/cloudprovider/aws/constraints.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,16 @@ var (
LaunchTemplateVersionLabel = AWSLabelPrefix + "launch-template-version"
SubnetNameLabel = AWSLabelPrefix + "subnet-name"
SubnetTagKeyLabel = AWSLabelPrefix + "subnet-tag-key"
SecurityGroupNameLabel = AWSLabelPrefix + "security-group-name"
SecurityGroupTagKeyLabel = AWSLabelPrefix + "security-group-tag-key"
AllowedLabels = []string{
CapacityTypeLabel,
LaunchTemplateIdLabel,
LaunchTemplateVersionLabel,
SubnetNameLabel,
SubnetTagKeyLabel,
SecurityGroupNameLabel,
SecurityGroupTagKeyLabel,
}
AWSToKubeArchitectures = map[string]string{
"x86_64": v1alpha1.ArchitectureAmd64,
Expand All @@ -66,8 +70,8 @@ func (c *Constraints) GetCapacityType() string {
}

type LaunchTemplate struct {
Id *string
Version *string
Id string
Version string
}

func (c *Constraints) GetLaunchTemplate() *LaunchTemplate {
Expand All @@ -80,25 +84,41 @@ func (c *Constraints) GetLaunchTemplate() *LaunchTemplate {
version = DefaultLaunchTemplateVersion
}
return &LaunchTemplate{
Id: &id,
Version: &version,
Id: id,
Version: version,
}
}

func (c *Constraints) GetSubnetName() *string {
subnetName, ok := c.Labels[SubnetNameLabel]
name, ok := c.Labels[SubnetNameLabel]
if !ok {
return nil
}
return aws.String(subnetName)
return aws.String(name)
}

func (c *Constraints) GetSubnetTagKey() *string {
subnetTag, ok := c.Labels[SubnetTagKeyLabel]
tag, ok := c.Labels[SubnetTagKeyLabel]
if !ok {
return nil
}
return aws.String(subnetTag)
return aws.String(tag)
}

func (c *Constraints) GetSecurityGroupName() *string {
name, ok := c.Labels[SecurityGroupNameLabel]
if !ok {
return nil
}
return aws.String(name)
}

func (c *Constraints) GetSecurityGroupTagKey() *string {
tag, ok := c.Labels[SecurityGroupTagKeyLabel]
if !ok {
return nil
}
return aws.String(tag)
}

func (c *Constraints) Validate(ctx context.Context) (errs *apis.FieldError) {
Expand Down
68 changes: 31 additions & 37 deletions pkg/cloudprovider/aws/fake/ec2api.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (

"github.com/Pallinder/go-randomdata"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
Expand All @@ -29,17 +30,17 @@ import (
// EC2Behavior must be reset between tests otherwise tests will
// pollute each other.
type EC2Behavior struct {
CreateFleetOutput *ec2.CreateFleetOutput
DescribeInstancesOutput *ec2.DescribeInstancesOutput
DescribeLaunchTemplatesOutput *ec2.DescribeLaunchTemplatesOutput
DescribeSubnetsOutput *ec2.DescribeSubnetsOutput
DescribeSecurityGroupsOutput *ec2.DescribeSecurityGroupsOutput
DescribeInstanceTypesOutput *ec2.DescribeInstanceTypesOutput
DescribeInstanceTypeOfferingsOutput *ec2.DescribeInstanceTypeOfferingsOutput
DescribeAvailabilityZonesOutput *ec2.DescribeAvailabilityZonesOutput
WantErr error
CalledWithCreateFleetInput []ec2.CreateFleetInput
CalledWithCreateFleetInput []*ec2.CreateFleetInput
CalledWithCreateLaunchTemplateInput []*ec2.CreateLaunchTemplateInput
Instances []*ec2.Instance
LaunchTemplates []*ec2.LaunchTemplate
}

type EC2API struct {
Expand All @@ -54,13 +55,7 @@ func (e *EC2API) Reset() {
}

func (e *EC2API) CreateFleetWithContext(ctx context.Context, input *ec2.CreateFleetInput, options ...request.Option) (*ec2.CreateFleetOutput, error) {
e.CalledWithCreateFleetInput = append(e.CalledWithCreateFleetInput, *input)
if e.WantErr != nil {
return nil, e.WantErr
}
if e.CreateFleetOutput != nil {
return e.CreateFleetOutput, nil
}
e.CalledWithCreateFleetInput = append(e.CalledWithCreateFleetInput, input)
if input.LaunchTemplateConfigs[0].LaunchTemplateSpecification.LaunchTemplateId == nil &&
input.LaunchTemplateConfigs[0].LaunchTemplateSpecification.LaunchTemplateName == nil {
return nil, fmt.Errorf("missing launch template id or name")
Expand All @@ -69,15 +64,20 @@ func (e *EC2API) CreateFleetWithContext(ctx context.Context, input *ec2.CreateFl
InstanceId: aws.String(randomdata.SillyName()),
Placement: &ec2.Placement{AvailabilityZone: aws.String("test-zone-1a")},
PrivateDnsName: aws.String(fmt.Sprintf("test-instance-%d.example.com", len(e.Instances))),
InstanceType: input.LaunchTemplateConfigs[0].Overrides[0].InstanceType,
}
e.Instances = append(e.Instances, instance)
return &ec2.CreateFleetOutput{Instances: []*ec2.CreateFleetInstance{{InstanceIds: []*string{instance.InstanceId}}}}, nil
}

func (e *EC2API) CreateLaunchTemplateWithContext(ctx context.Context, input *ec2.CreateLaunchTemplateInput, options ...request.Option) (*ec2.CreateLaunchTemplateOutput, error) {
e.CalledWithCreateLaunchTemplateInput = append(e.CalledWithCreateLaunchTemplateInput, input)
launchTemplate := &ec2.LaunchTemplate{LaunchTemplateName: input.LaunchTemplateName, LaunchTemplateId: aws.String("test-launch-template-id")}
e.LaunchTemplates = append(e.LaunchTemplates, launchTemplate)
return &ec2.CreateLaunchTemplateOutput{LaunchTemplate: launchTemplate}, nil
}

func (e *EC2API) DescribeInstancesWithContext(context.Context, *ec2.DescribeInstancesInput, ...request.Option) (*ec2.DescribeInstancesOutput, error) {
if e.WantErr != nil {
return nil, e.WantErr
}
if e.DescribeInstancesOutput != nil {
return e.DescribeInstancesOutput, nil
}
Expand All @@ -86,23 +86,25 @@ func (e *EC2API) DescribeInstancesWithContext(context.Context, *ec2.DescribeInst
}, nil
}

func (e *EC2API) DescribeLaunchTemplatesWithContext(context.Context, *ec2.DescribeLaunchTemplatesInput, ...request.Option) (*ec2.DescribeLaunchTemplatesOutput, error) {
if e.WantErr != nil {
return nil, e.WantErr
}
func (e *EC2API) DescribeLaunchTemplatesWithContext(ctx context.Context, input *ec2.DescribeLaunchTemplatesInput, options ...request.Option) (*ec2.DescribeLaunchTemplatesOutput, error) {
if e.DescribeLaunchTemplatesOutput != nil {
return e.DescribeLaunchTemplatesOutput, nil
}
return &ec2.DescribeLaunchTemplatesOutput{LaunchTemplates: []*ec2.LaunchTemplate{{
LaunchTemplateName: aws.String("test-launch-template-name"),
LaunchTemplateId: aws.String("test-launch-template-id"),
}}}, nil
output := &ec2.DescribeLaunchTemplatesOutput{}
for _, wanted := range input.LaunchTemplateNames {
for _, launchTemplate := range e.LaunchTemplates {
if launchTemplate.LaunchTemplateName == wanted {
output.LaunchTemplates = append(output.LaunchTemplates, launchTemplate)
}
}
}
if len(output.LaunchTemplates) == 0 {
return nil, awserr.New("InvalidLaunchTemplateName.NotFoundException", "not found", nil)
}
return output, nil
}

func (e *EC2API) DescribeSubnetsWithContext(context.Context, *ec2.DescribeSubnetsInput, ...request.Option) (*ec2.DescribeSubnetsOutput, error) {
if e.WantErr != nil {
return nil, e.WantErr
}
if e.DescribeSubnetsOutput != nil {
return e.DescribeSubnetsOutput, nil
}
Expand All @@ -117,19 +119,17 @@ func (e *EC2API) DescribeSubnetsWithContext(context.Context, *ec2.DescribeSubnet
}

func (e *EC2API) DescribeSecurityGroupsWithContext(context.Context, *ec2.DescribeSecurityGroupsInput, ...request.Option) (*ec2.DescribeSecurityGroupsOutput, error) {
if e.WantErr != nil {
return nil, e.WantErr
}
if e.DescribeSecurityGroupsOutput != nil {
return e.DescribeSecurityGroupsOutput, nil
}
return &ec2.DescribeSecurityGroupsOutput{SecurityGroups: []*ec2.SecurityGroup{{GroupId: aws.String("test-group")}}}, nil
return &ec2.DescribeSecurityGroupsOutput{SecurityGroups: []*ec2.SecurityGroup{
{GroupId: aws.String("test-security-group-1"), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-security-group-1")}}},
{GroupId: aws.String("test-security-group-2"), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-security-group-2")}}},
{GroupId: aws.String("test-security-group-3"), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-security-group-3")}, {Key: aws.String("TestTag")}}},
}}, nil
}

func (e *EC2API) DescribeAvailabilityZonesWithContext(context.Context, *ec2.DescribeAvailabilityZonesInput, ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) {
if e.WantErr != nil {
return nil, e.WantErr
}
if e.DescribeAvailabilityZonesOutput != nil {
return e.DescribeAvailabilityZonesOutput, nil
}
Expand All @@ -141,9 +141,6 @@ func (e *EC2API) DescribeAvailabilityZonesWithContext(context.Context, *ec2.Desc
}

func (e *EC2API) DescribeInstanceTypesPagesWithContext(ctx context.Context, input *ec2.DescribeInstanceTypesInput, fn func(*ec2.DescribeInstanceTypesOutput, bool) bool, opts ...request.Option) error {
if e.WantErr != nil {
return e.WantErr
}
if e.DescribeInstanceTypesOutput != nil {
fn(e.DescribeInstanceTypesOutput, false)
return nil
Expand Down Expand Up @@ -267,9 +264,6 @@ func (e *EC2API) DescribeInstanceTypesPagesWithContext(ctx context.Context, inpu
}

func (e *EC2API) DescribeInstanceTypeOfferingsPagesWithContext(ctx context.Context, input *ec2.DescribeInstanceTypeOfferingsInput, fn func(*ec2.DescribeInstanceTypeOfferingsOutput, bool) bool, opts ...request.Option) error {
if e.WantErr != nil {
return e.WantErr
}
if e.DescribeInstanceTypeOfferingsOutput != nil {
fn(e.DescribeInstanceTypeOfferingsOutput, false)
return nil
Expand Down
35 changes: 0 additions & 35 deletions pkg/cloudprovider/aws/fake/sqsqueue.go

This file was deleted.

5 changes: 2 additions & 3 deletions pkg/cloudprovider/aws/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ func (p *InstanceProvider) Create(ctx context.Context,
},
LaunchTemplateConfigs: []*ec2.FleetLaunchTemplateConfigRequest{{
LaunchTemplateSpecification: &ec2.FleetLaunchTemplateSpecificationRequest{
LaunchTemplateId: launchTemplate.Id,
Version: launchTemplate.Version,
LaunchTemplateId: aws.String(launchTemplate.Id),
Version: aws.String(launchTemplate.Version),
},
Overrides: overrides,
}},
Expand All @@ -102,7 +102,6 @@ func (p *InstanceProvider) Create(ctx context.Context,
if count := len(createFleetOutput.Instances[0].InstanceIds); count != 1 {
return nil, fmt.Errorf("expected 1 instance ids, but got %d due to errors %v", count, createFleetOutput.Errors)
}
// TODO aggregate errors
if count := len(createFleetOutput.Errors); count > 0 {
zap.S().Warnf("CreateFleet encountered %d errors, but still launched instances, %v", count, createFleetOutput.Errors)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/aws/instancetypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (p *InstanceTypeProvider) Get(ctx context.Context) ([]cloudprovider.Instanc
return nil, err
}
p.cache.SetDefault(allInstanceTypesKey, instanceTypes)
zap.S().Debugf("Successfully discovered %d EC2 instance types", len(instanceTypes))
zap.S().Debugf("Discovered %d EC2 instance types", len(instanceTypes))
}
return instanceTypes, nil
}
Expand Down
Loading