From eb0123be02d1f5e4c5bb8abe013a6da711c88952 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 24 Feb 2022 16:39:43 -0600 Subject: [PATCH] [AWS] Favor subnets w/ more IP addresses when launching instances (#1413) * always use the bigger subnet * pr comments --- pkg/cloudprovider/aws/fake/ec2api.go | 6 +-- pkg/cloudprovider/aws/instance.go | 46 +++++++++++-------- pkg/cloudprovider/aws/suite_test.go | 21 ++++++++- .../content/en/preview/AWS/provisioning.md | 2 +- 4 files changed, 50 insertions(+), 25 deletions(-) diff --git a/pkg/cloudprovider/aws/fake/ec2api.go b/pkg/cloudprovider/aws/fake/ec2api.go index 6b17bf9e60a2..071d279d1e57 100644 --- a/pkg/cloudprovider/aws/fake/ec2api.go +++ b/pkg/cloudprovider/aws/fake/ec2api.go @@ -180,11 +180,11 @@ func (e *EC2API) DescribeSubnetsWithContext(context.Context, *ec2.DescribeSubnet return e.DescribeSubnetsOutput, nil } return &ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), + {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}}, - {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1b"), + {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1b"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, - {SubnetId: aws.String("test-subnet-3"), AvailabilityZone: aws.String("test-zone-1c"), + {SubnetId: aws.String("test-subnet-3"), AvailabilityZone: aws.String("test-zone-1c"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-3")}, {Key: aws.String("TestTag")}}}, }}, nil } diff --git a/pkg/cloudprovider/aws/instance.go b/pkg/cloudprovider/aws/instance.go index f5af5cdd1cca..c9a1fc0a4c24 100644 --- a/pkg/cloudprovider/aws/instance.go +++ b/pkg/cloudprovider/aws/instance.go @@ -17,6 +17,7 @@ package aws import ( "context" "fmt" + "sort" "strings" "time" @@ -180,6 +181,14 @@ func (p *InstanceProvider) getLaunchTemplateConfigs(ctx context.Context, constra // getOverrides creates and returns launch template overrides for the cross product of instanceTypeOptions and subnets (with subnets being constrained by // zones and the offerings in instanceTypeOptions) func (p *InstanceProvider) getOverrides(instanceTypeOptions []cloudprovider.InstanceType, subnets []*ec2.Subnet, zones sets.String, capacityType string) []*ec2.FleetLaunchTemplateOverridesRequest { + // sort subnets in ascending order of available IP addresses and populate map with most available subnet per AZ + zonalSubnets := map[string]*ec2.Subnet{} + sort.Slice(subnets, func(i, j int) bool { + return aws.Int64Value(subnets[i].AvailableIpAddressCount) < aws.Int64Value(subnets[j].AvailableIpAddressCount) + }) + for _, subnet := range subnets { + zonalSubnets[*subnet.AvailabilityZone] = subnet + } var overrides []*ec2.FleetLaunchTemplateOverridesRequest for i, instanceType := range instanceTypeOptions { for _, offering := range instanceType.Offerings() { @@ -189,27 +198,24 @@ func (p *InstanceProvider) getOverrides(instanceTypeOptions []cloudprovider.Inst if !zones.Has(offering.Zone) { continue } - for _, subnet := range subnets { - if aws.StringValue(subnet.AvailabilityZone) != offering.Zone { - continue - } - override := &ec2.FleetLaunchTemplateOverridesRequest{ - InstanceType: aws.String(instanceType.Name()), - SubnetId: subnet.SubnetId, - // This is technically redundant, but is useful if we have to parse insufficient capacity errors from - // CreateFleet so that we can figure out the zone rather than additional API calls to look up the subnet - AvailabilityZone: subnet.AvailabilityZone, - } - // Add a priority for spot requests since we are using the capacity-optimized-prioritized spot allocation strategy - // to reduce the likelihood of getting an excessively large instance type. - // instanceTypeOptions are sorted by vcpus and memory so this prioritizes smaller instance types. - if capacityType == v1alpha1.CapacityTypeSpot { - override.Priority = aws.Float64(float64(i)) - } - overrides = append(overrides, override) - // FleetAPI cannot span subnets from the same AZ, so break after the first one. - break + subnet, ok := zonalSubnets[offering.Zone] + if !ok { + continue + } + override := &ec2.FleetLaunchTemplateOverridesRequest{ + InstanceType: aws.String(instanceType.Name()), + SubnetId: subnet.SubnetId, + // This is technically redundant, but is useful if we have to parse insufficient capacity errors from + // CreateFleet so that we can figure out the zone rather than additional API calls to look up the subnet + AvailabilityZone: subnet.AvailabilityZone, + } + // Add a priority for spot requests since we are using the capacity-optimized-prioritized spot allocation strategy + // to reduce the likelihood of getting an excessively large instance type. + // instanceTypeOptions are sorted by vcpus and memory so this prioritizes smaller instance types. + if capacityType == v1alpha1.CapacityTypeSpot { + override.Priority = aws.Float64(float64(i)) } + overrides = append(overrides, override) } } return overrides diff --git a/pkg/cloudprovider/aws/suite_test.go b/pkg/cloudprovider/aws/suite_test.go index 5ff42b4c40bb..da5a81cae738 100644 --- a/pkg/cloudprovider/aws/suite_test.go +++ b/pkg/cloudprovider/aws/suite_test.go @@ -55,6 +55,7 @@ var opts options.Options var env *test.Environment var launchTemplateCache *cache.Cache var securityGroupCache *cache.Cache +var subnetCache *cache.Cache var unavailableOfferingsCache *cache.Cache var fakeEC2API *fake.EC2API var provisioners *provisioning.Controller @@ -80,8 +81,12 @@ var _ = BeforeSuite(func() { launchTemplateCache = cache.New(CacheTTL, CacheCleanupInterval) unavailableOfferingsCache = cache.New(InsufficientCapacityErrorCacheTTL, InsufficientCapacityErrorCacheCleanupInterval) securityGroupCache = cache.New(CacheTTL, CacheCleanupInterval) + subnetCache = cache.New(CacheTTL, CacheCleanupInterval) fakeEC2API = &fake.EC2API{} - subnetProvider := NewSubnetProvider(fakeEC2API) + subnetProvider := &SubnetProvider{ + ec2api: fakeEC2API, + cache: subnetCache, + } instanceTypeProvider := &InstanceTypeProvider{ ec2api: fakeEC2API, subnetProvider: subnetProvider, @@ -131,6 +136,7 @@ var _ = Describe("Allocation", func() { fakeEC2API.Reset() launchTemplateCache.Flush() securityGroupCache.Flush() + subnetCache.Flush() unavailableOfferingsCache.Flush() }) @@ -448,6 +454,19 @@ var _ = Describe("Allocation", func() { &ec2.FleetLaunchTemplateOverridesRequest{SubnetId: aws.String("test-subnet-3"), InstanceType: aws.String("m5.large"), AvailabilityZone: aws.String("test-zone-1c")}, )) }) + It("should launch instances into subnet with the most available IP addresses", func() { + fakeEC2API.DescribeSubnetsOutput = &ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ + {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10), + Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}}, + {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(100), + Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, + }} + + pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner, test.UnschedulablePod(test.PodOptions{NodeSelector: map[string]string{v1.LabelTopologyZone: "test-zone-1a"}}))[0] + ExpectScheduled(ctx, env.Client, pod) + createFleetInput := fakeEC2API.CalledWithCreateFleetInput.Pop().(*ec2.CreateFleetInput) + Expect(aws.StringValue(createFleetInput.LaunchTemplateConfigs[0].Overrides[0].SubnetId)).To(Equal("test-subnet-2")) + }) }) Context("Security Groups", func() { It("should default to the clusters security groups", func() { diff --git a/website/content/en/preview/AWS/provisioning.md b/website/content/en/preview/AWS/provisioning.md index 1e9e4953e989..ba64f8ebac36 100644 --- a/website/content/en/preview/AWS/provisioning.md +++ b/website/content/en/preview/AWS/provisioning.md @@ -41,7 +41,7 @@ Karpenter discovers subnets using [AWS tags](https://docs.aws.amazon.com/AWSEC2/ Subnets may be specified by any AWS tag, including `Name`. Selecting tag values using wildcards ("\*") is supported. -When launching nodes, Karpenter automatically chooses a subnet that matches the desired zone. If multiple subnets exist for a zone, one is chosen randomly. +When launching nodes, Karpenter automatically chooses a subnet that matches the desired zone. If multiple subnets exist for a zone, the one with the most available IP addresses will be used. **Examples**