Skip to content

Commit

Permalink
support arm, remove instance type fallbacks, and fail on errors for d…
Browse files Browse the repository at this point in the history
…ynamic instance type retrieval
  • Loading branch information
bwagner5 committed Mar 15, 2021
1 parent db6be0d commit 7e62b95
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 115 deletions.
92 changes: 7 additions & 85 deletions pkg/cloudprovider/aws/packing/nodecapacity.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,106 +23,28 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
)

// TODO get this information from node-instance-selector
var (
fallbackNodeCapacities = []*nodeCapacity{

{
instanceType: "m5.24xlarge",
total: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("96000m"),
v1.ResourceMemory: resource.MustParse("384Gi"),
v1.ResourcePods: resource.MustParse("737"),
},
reserved: v1.ResourceList{
v1.ResourceCPU: resource.Quantity{},
v1.ResourceMemory: resource.Quantity{},
},
},
{
instanceType: "m5.8xlarge",
total: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("32000m"),
v1.ResourceMemory: resource.MustParse("128Gi"),
v1.ResourcePods: resource.MustParse("234"),
},
reserved: v1.ResourceList{
v1.ResourceCPU: resource.Quantity{},
v1.ResourceMemory: resource.Quantity{},
},
},
{
instanceType: "m5.2xlarge",
total: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("8000m"),
v1.ResourceMemory: resource.MustParse("32Gi"),
v1.ResourcePods: resource.MustParse("58"),
},
reserved: v1.ResourceList{
v1.ResourceCPU: resource.Quantity{},
v1.ResourceMemory: resource.Quantity{},
},
},
{
instanceType: "m5.xlarge",
total: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4000m"),
v1.ResourceMemory: resource.MustParse("16Gi"),
v1.ResourcePods: resource.MustParse("58"),
},
reserved: v1.ResourceList{
v1.ResourceCPU: resource.Quantity{},
v1.ResourceMemory: resource.Quantity{},
},
},
{
instanceType: "m5.large",
total: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("8Gi"),
v1.ResourcePods: resource.MustParse("29"),
},
reserved: v1.ResourceList{
v1.ResourceCPU: resource.Quantity{},
v1.ResourceMemory: resource.Quantity{},
},
},
}
)

func instanceTypeInfoToNodeCapacity(instanceTypeInfo ec2.InstanceTypeInfo) (*nodeCapacity, error) {
func instanceTypeInfoToNodeCapacity(instanceTypeInfo ec2.InstanceTypeInfo) *nodeCapacity {
instanceTypeName := *instanceTypeInfo.InstanceType
vcpusInMillicores := fmt.Sprintf("%dm", *instanceTypeInfo.VCpuInfo.DefaultVCpus*1000)
vcpusResource, err := resource.ParseQuantity(vcpusInMillicores)
if err != nil {
return nil, fmt.Errorf("parsing %s millicores resource quantity \"%s\" from instanceTypeInfo, %w", instanceTypeName, vcpusInMillicores, err)
}
memory := fmt.Sprintf("%dMi", *instanceTypeInfo.MemoryInfo.SizeInMiB)
memoryResource, err := resource.ParseQuantity(memory)
if err != nil {
return nil, fmt.Errorf("parsing %s memory resource quantity \"%s\", %w", instanceTypeName, memory, err)
}
vcpusInMillicores := resource.MustParse(fmt.Sprint(*instanceTypeInfo.VCpuInfo.DefaultVCpus * 1000))
memory := resource.MustParse(fmt.Sprintf("%dMi", *instanceTypeInfo.MemoryInfo.SizeInMiB))
// The number of pods per node is calculated using the formula:
// max number of ENIs * (IPv4 Addresses per ENI -1) + 2
// https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt#L20
podCapacity := *instanceTypeInfo.NetworkInfo.MaximumNetworkInterfaces*(*instanceTypeInfo.NetworkInfo.Ipv4AddressesPerInterface-1) + 2
podCapacityResource, err := resource.ParseQuantity(fmt.Sprint(podCapacity))
if err != nil {
return nil, fmt.Errorf("parsing %s pod capacity resource quantity \"%d\", %w", instanceTypeName, podCapacity, err)
}
podCapacityResource := resource.MustParse(fmt.Sprint(podCapacity))

return &nodeCapacity{
instanceType: instanceTypeName,
total: v1.ResourceList{
v1.ResourceCPU: vcpusResource,
v1.ResourceMemory: memoryResource,
v1.ResourceCPU: vcpusInMillicores,
v1.ResourceMemory: memory,
v1.ResourcePods: podCapacityResource,
},
reserved: v1.ResourceList{
v1.ResourceCPU: resource.Quantity{},
v1.ResourceMemory: resource.Quantity{},
},
}, nil
}
}

type nodeCapacity struct {
Expand Down
87 changes: 57 additions & 30 deletions pkg/cloudprovider/aws/packing/packing.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ package packing

import (
"context"
"fmt"
"sort"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha1"
"github.com/awslabs/karpenter/pkg/cloudprovider"
"github.com/awslabs/karpenter/pkg/utils/binpacking"
"github.com/awslabs/karpenter/pkg/utils/resources"
Expand Down Expand Up @@ -69,9 +71,13 @@ func (p *podPacker) Pack(ctx context.Context, constraints *cloudprovider.Constra
sort.Sort(sort.Reverse(binpacking.ByResourcesRequested{SortablePods: constraints.Pods}))
packings := []*Packing{}
var packing *Packing
var err error
remainingPods := constraints.Pods
for len(remainingPods) > 0 {
packing, remainingPods = p.packWithLargestPod(remainingPods, constraints)
packing, remainingPods, err = p.packWithLargestPod(remainingPods, constraints)
if err != nil {
return packings, err
}
// checked all instance type and found no packing option
if len(packing.Pods) == 0 {
zap.S().Warnf("Failed to find instance type for pod %s/%s ", remainingPods[0].Namespace, remainingPods[0].Name)
Expand All @@ -84,55 +90,76 @@ func (p *podPacker) Pack(ctx context.Context, constraints *cloudprovider.Constra
return packings, nil
}

func (p *podPacker) getNodeCapacities(constraints *cloudprovider.Constraints) []*nodeCapacity {
func (p *podPacker) getNodeCapacities(constraints *cloudprovider.Constraints) ([]*nodeCapacity, error) {
result := make([]*nodeCapacity, 0)

describeInstanceTypesInput := &ec2.DescribeInstanceTypesInput{
Filters: []*ec2.Filter{
{
Name: aws.String("processor-info.supported-architecture"),
Values: []*string{aws.String("x86_64")},
},
{
Name: aws.String("supported-usage-class"),
Values: []*string{aws.String("on-demand")},
},
{
Name: aws.String("supported-virtualization-type"),
Values: []*string{aws.String("hvm")},
},
},
Filters: constraintsToDescribeInstanceTypesFilters(constraints),
}

err := p.ec2.DescribeInstanceTypesPagesWithContext(context.TODO(), describeInstanceTypesInput, func(page *ec2.DescribeInstanceTypesOutput, lastPage bool) bool {
for _, instanceTypeInfo := range page.InstanceTypes {
nc, err := instanceTypeInfoToNodeCapacity(*instanceTypeInfo)
if err != nil {
zap.S().Warnf("Failed to convert instanceTypeInfo to a nodeCapacity, %s", err.Error())
continue
}
ncc := nc.Copy()
kubeletOverhead := binpacking.CalculateKubeletOverhead(ncc.total)
if ok := ncc.reserve(resources.Merge(constraints.Overhead, kubeletOverhead)); !ok {
zap.S().Errorf("Failed to reserve kubelet overhead for node capacity type %v", ncc.instanceType)
nc := instanceTypeInfoToNodeCapacity(*instanceTypeInfo)
kubeletOverhead := binpacking.CalculateKubeletOverhead(nc.total)
if ok := nc.reserve(resources.Merge(constraints.Overhead, kubeletOverhead)); !ok {
zap.S().Errorf("Failed to reserve kubelet overhead for node capacity type %v", nc.instanceType)
}
result = append(result, nc)
}
return lastPage
})

if err != nil {
zap.S().Warnf("Failed to fetch instance types using ec2.DescribeInstanceTypes, %s", err.Error())
return nil, fmt.Errorf("fetching instance types using ec2.DescribeInstanceTypes, %w", err)
}
return result
return result, nil
}

func constraintsToDescribeInstanceTypesFilters(constraints *cloudprovider.Constraints) []*ec2.Filter {
architecture := "x86_64"
if constraints.Architecture != nil || *constraints.Architecture == v1alpha1.ArchitectureArm64 {
architecture = string(*constraints.Architecture)
}

filters := []*ec2.Filter{
{
Name: aws.String("processor-info.supported-architecture"),
Values: []*string{&architecture},
},
{
Name: aws.String("supported-usage-class"),
Values: []*string{aws.String("on-demand")},
},
{
Name: aws.String("supported-virtualization-type"),
Values: []*string{aws.String("hvm")},
},
}

instanceTypeConstraints := make([]*string, 0)
instanceTypesFilter := &ec2.Filter{
Name: aws.String("instance-type"),
Values: instanceTypeConstraints,
}
for _, instanceType := range constraints.InstanceTypes {
instanceTypesFilter.Values = append(instanceTypesFilter.Values, &instanceType)
}
filters = append(filters, instanceTypesFilter)
return filters
}

// packWithLargestPod will try to pack max number of pods with largest pod in
// pods across all available node capacities. It returns Packing: max pod count
// that fit; with their node capacities and list of leftover pods
func (p *podPacker) packWithLargestPod(unpackedPods []*v1.Pod, constraints *cloudprovider.Constraints) (*Packing, []*v1.Pod) {
func (p *podPacker) packWithLargestPod(unpackedPods []*v1.Pod, constraints *cloudprovider.Constraints) (*Packing, []*v1.Pod, error) {
bestPackedPods := []*v1.Pod{}
bestCapacities := []*nodeCapacity{}
remainingPods := unpackedPods
for _, nc := range p.getNodeCapacities(constraints) {
nodeCapacities, err := p.getNodeCapacities(constraints)
if err != nil {
return nil, bestPackedPods, err
}
for _, nc := range nodeCapacities {
// check how many pods we can fit with the available capacity
result := p.packPodsForCapacity(nc, unpackedPods)
if len(result.packed) == 0 {
Expand All @@ -154,7 +181,7 @@ func (p *podPacker) packWithLargestPod(unpackedPods []*v1.Pod, constraints *clou
for _, capacity := range bestCapacities {
capacityNames = append(capacityNames, capacity.instanceType)
}
return &Packing{Pods: bestPackedPods, InstanceTypes: capacityNames}, remainingPods
return &Packing{Pods: bestPackedPods, InstanceTypes: capacityNames}, remainingPods, nil
}

func (p *podPacker) packPodsForCapacity(capacity *nodeCapacity, pods []*v1.Pod) *packingResult {
Expand Down

0 comments on commit 7e62b95

Please sign in to comment.