Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pod ENI #924

Merged
merged 1 commit into from
Dec 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ go 1.17
require (
github.com/Pallinder/go-randomdata v1.2.0
github.com/avast/retry-go v2.7.0+incompatible
github.com/aws/aws-sdk-go v1.38.69
github.com/aws/amazon-vpc-resource-controller-k8s v1.1.0
github.com/aws/aws-sdk-go v1.40.43
github.com/deckarep/golang-set v1.7.1
github.com/go-logr/zapr v0.4.0
github.com/imdario/mergo v0.3.12
Expand Down
195 changes: 193 additions & 2 deletions go.sum

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions pkg/cloudprovider/aws/fake/ec2api.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,34 @@ func (e *EC2API) DescribeInstanceTypesPagesWithContext(_ context.Context, _ *ec2
}
fn(&ec2.DescribeInstanceTypesOutput{
InstanceTypes: []*ec2.InstanceTypeInfo{
{
InstanceType: aws.String("t3.large"),
SupportedUsageClasses: DefaultSupportedUsageClasses,
SupportedVirtualizationTypes: []*string{aws.String("hvm")},
BurstablePerformanceSupported: aws.Bool(true),
BareMetal: aws.Bool(false),
Hypervisor: aws.String("nitro"),
ProcessorInfo: &ec2.ProcessorInfo{
SupportedArchitectures: aws.StringSlice([]string{"x86_64"}),
},
VCpuInfo: &ec2.VCpuInfo{
DefaultVCpus: aws.Int64(2),
},
MemoryInfo: &ec2.MemoryInfo{
SizeInMiB: aws.Int64(8 * 1024),
},
NetworkInfo: &ec2.NetworkInfo{
MaximumNetworkInterfaces: aws.Int64(3),
Ipv4AddressesPerInterface: aws.Int64(12),
},
},
{
InstanceType: aws.String("m5.large"),
SupportedUsageClasses: DefaultSupportedUsageClasses,
SupportedVirtualizationTypes: []*string{aws.String("hvm")},
BurstablePerformanceSupported: aws.Bool(false),
BareMetal: aws.Bool(false),
Hypervisor: aws.String("nitro"),
ProcessorInfo: &ec2.ProcessorInfo{
SupportedArchitectures: aws.StringSlice([]string{"x86_64"}),
},
Expand All @@ -244,6 +266,7 @@ func (e *EC2API) DescribeInstanceTypesPagesWithContext(_ context.Context, _ *ec2
SupportedVirtualizationTypes: []*string{aws.String("hvm")},
BurstablePerformanceSupported: aws.Bool(false),
BareMetal: aws.Bool(false),
Hypervisor: aws.String("nitro"),
ProcessorInfo: &ec2.ProcessorInfo{
SupportedArchitectures: aws.StringSlice([]string{"x86_64"}),
},
Expand All @@ -264,6 +287,7 @@ func (e *EC2API) DescribeInstanceTypesPagesWithContext(_ context.Context, _ *ec2
SupportedVirtualizationTypes: []*string{aws.String("hvm")},
BurstablePerformanceSupported: aws.Bool(false),
BareMetal: aws.Bool(false),
Hypervisor: aws.String("nitro"),
ProcessorInfo: &ec2.ProcessorInfo{
SupportedArchitectures: aws.StringSlice([]string{"x86_64"}),
},
Expand All @@ -290,6 +314,7 @@ func (e *EC2API) DescribeInstanceTypesPagesWithContext(_ context.Context, _ *ec2
SupportedVirtualizationTypes: []*string{aws.String("hvm")},
BurstablePerformanceSupported: aws.Bool(false),
BareMetal: aws.Bool(false),
Hypervisor: aws.String("nitro"),
ProcessorInfo: &ec2.ProcessorInfo{
SupportedArchitectures: aws.StringSlice([]string{v1alpha5.ArchitectureArm64}),
},
Expand All @@ -310,6 +335,7 @@ func (e *EC2API) DescribeInstanceTypesPagesWithContext(_ context.Context, _ *ec2
SupportedVirtualizationTypes: []*string{aws.String("hvm")},
BurstablePerformanceSupported: aws.Bool(false),
BareMetal: aws.Bool(false),
Hypervisor: aws.String("nitro"),
ProcessorInfo: &ec2.ProcessorInfo{
SupportedArchitectures: aws.StringSlice([]string{"x86_64"}),
},
Expand All @@ -335,6 +361,7 @@ func (e *EC2API) DescribeInstanceTypesPagesWithContext(_ context.Context, _ *ec2
SupportedVirtualizationTypes: []*string{aws.String("hvm")},
BurstablePerformanceSupported: aws.Bool(false),
BareMetal: aws.Bool(false),
Hypervisor: aws.String("nitro"),
ProcessorInfo: &ec2.ProcessorInfo{
SupportedArchitectures: aws.StringSlice([]string{"x86_64"}),
},
Expand Down Expand Up @@ -402,6 +429,14 @@ func (e *EC2API) DescribeInstanceTypeOfferingsPagesWithContext(_ context.Context
InstanceType: aws.String("p3.8xlarge"),
Location: aws.String("test-zone-1b"),
},
{
InstanceType: aws.String("t3.large"),
Location: aws.String("test-zone-1a"),
},
{
InstanceType: aws.String("t3.large"),
Location: aws.String("test-zone-1b"),
},
{
InstanceType: aws.String("inf1.2xlarge"),
Location: aws.String("test-zone-1a"),
Expand Down
10 changes: 10 additions & 0 deletions pkg/cloudprovider/aws/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package aws
import (
"fmt"

"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/vpc"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/karpenter/pkg/cloudprovider"
Expand Down Expand Up @@ -75,6 +76,15 @@ func (i *InstanceType) Pods() *resource.Quantity {
return resources.Quantity(fmt.Sprint(*i.NetworkInfo.MaximumNetworkInterfaces*(*i.NetworkInfo.Ipv4AddressesPerInterface-1) + 2))
}

func (i *InstanceType) AWSPodENI() *resource.Quantity {
// https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html#supported-instance-types
limits, ok := vpc.Limits[aws.StringValue(i.InstanceType)]
if ok && limits.IsTrunkingCompatible {
return resources.Quantity(fmt.Sprint(limits.BranchInterface))
}
return resources.Quantity("0")
}

func (i *InstanceType) NvidiaGPUs() *resource.Quantity {
count := int64(0)
if i.GpuInfo != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/aws/launchtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func (p *LaunchTemplateProvider) ensureLaunchTemplate(ctx context.Context, optio
}

// needsDocker returns true if the instance type is unable to use
// conatinerd directly
// containerd directly
func needsDocker(is []cloudprovider.InstanceType) bool {
for _, i := range is {
if !i.AWSNeurons().IsZero() || !i.NvidiaGPUs().IsZero() {
Expand Down
32 changes: 32 additions & 0 deletions pkg/cloudprovider/aws/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"testing"

"github.com/Pallinder/go-randomdata"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/vpc"
"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"
"github.com/aws/karpenter/pkg/cloudprovider/aws/apis/v1alpha1"
"github.com/aws/karpenter/pkg/cloudprovider/aws/fake"
Expand Down Expand Up @@ -121,6 +122,37 @@ var _ = Describe("Allocation", func() {

Context("Reconciliation", func() {
Context("Specialized Hardware", func() {
It("should not launch AWS Pod ENI on a t3", func() {
for _, pod := range ExpectProvisioned(ctx, env.Client, scheduler, provisioners, provisioner,
test.UnschedulablePod(test.PodOptions{
NodeSelector: map[string]string{
v1.LabelInstanceTypeStable: "t3.large",
},
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{resources.AWSPodENI: resource.MustParse("1")},
Limits: v1.ResourceList{resources.AWSPodENI: resource.MustParse("1")},
},
})) {
ExpectNotScheduled(ctx, env.Client, pod)
}
})
It("should launch AWS Pod ENI on a compatible instance type", func() {
for _, pod := range ExpectProvisioned(ctx, env.Client, scheduler, provisioners, provisioner,
test.UnschedulablePod(test.PodOptions{
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{resources.AWSPodENI: resource.MustParse("1")},
Limits: v1.ResourceList{resources.AWSPodENI: resource.MustParse("1")},
},
})) {
node := ExpectScheduled(ctx, env.Client, pod)
Expect(node.Labels).To(HaveKey(v1.LabelInstanceTypeStable))
supportsPodENI := func() bool {
limits, ok := vpc.Limits[node.Labels[v1.LabelInstanceTypeStable]]
return ok && limits.IsTrunkingCompatible
}
Expect(supportsPodENI()).To(Equal(true))
}
})
It("should launch instances for Nvidia GPU resource requests", func() {
nodeNames := sets.NewString()
for _, pod := range ExpectProvisioned(ctx, env.Client, scheduler, provisioners, provisioner,
Expand Down
4 changes: 4 additions & 0 deletions pkg/cloudprovider/fake/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ func (c *CloudProvider) GetInstanceTypes(_ context.Context, _ *v1alpha5.Constrai
NewInstanceType(InstanceTypeOptions{
name: "default-instance-type",
}),
NewInstanceType(InstanceTypeOptions{
name: "pod-eni-instance-type",
awsPodENI: resource.MustParse("1"),
}),
NewInstanceType(InstanceTypeOptions{
name: "small-instance-type",
cpu: resource.MustParse("2"),
Expand Down
6 changes: 6 additions & 0 deletions pkg/cloudprovider/fake/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func NewInstanceType(options InstanceTypeOptions) *InstanceType {
nvidiaGPUs: options.nvidiaGPUs,
amdGPUs: options.amdGPUs,
awsNeurons: options.awsNeurons,
awsPodENI: options.awsPodENI,
},
}
}
Expand All @@ -73,6 +74,7 @@ type InstanceTypeOptions struct {
nvidiaGPUs resource.Quantity
amdGPUs resource.Quantity
awsNeurons resource.Quantity
awsPodENI resource.Quantity
}

type InstanceType struct {
Expand Down Expand Up @@ -119,6 +121,10 @@ func (i *InstanceType) AWSNeurons() *resource.Quantity {
return &i.awsNeurons
}

func (i *InstanceType) AWSPodENI() *resource.Quantity {
return &i.awsPodENI
}

func (i *InstanceType) Overhead() v1.ResourceList {
return v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
Expand Down
1 change: 1 addition & 0 deletions pkg/cloudprovider/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ type InstanceType interface {
NvidiaGPUs() *resource.Quantity
AMDGPUs() *resource.Quantity
AWSNeurons() *resource.Quantity
AWSPodENI() *resource.Quantity
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit challenging. We've been talking about modeling this in a more vendor neutral way like:

Resources() map[resource.Quantity]

I'm willing to refactor this later, and accept this as is, though.

Overhead() v1.ResourceList
}

Expand Down
16 changes: 16 additions & 0 deletions pkg/controllers/provisioning/binpacking/packable.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ func PackablesFor(ctx context.Context, instanceTypes []cloudprovider.InstanceTyp
packable.validateArchitecture(constraints),
packable.validateOperatingSystems(constraints),
packable.validateCapacityTypes(constraints),
packable.validateAWSPodENI(pods),
// Although this will remove instances that have GPUs when
// not required, removal of instance types that *lack*
// GPUs will be done later.
Expand Down Expand Up @@ -88,6 +89,7 @@ func PackableFor(i cloudprovider.InstanceType) *Packable {
resources.NvidiaGPU: *i.NvidiaGPUs(),
resources.AMDGPU: *i.AMDGPUs(),
resources.AWSNeuron: *i.AWSNeurons(),
resources.AWSPodENI: *i.AWSPodENI(),
v1.ResourcePods: *i.Pods(),
},
}
Expand Down Expand Up @@ -237,6 +239,20 @@ func (p *Packable) validateAWSNeurons(pods []*v1.Pod) error {
return fmt.Errorf("aws neuron is not required")
}

func (p *Packable) validateAWSPodENI(pods []*v1.Pod) error {
for _, pod := range pods {
for _, container := range pod.Spec.Containers {
if _, ok := container.Resources.Requests[resources.AWSPodENI]; ok {
if p.InstanceType.AWSPodENI().IsZero() {
return fmt.Errorf("aws pod eni is required")
}
return nil
}
}
}
return nil
}

func packableNames(instanceTypes []*Packable) []string {
names := []string{}
for _, instanceType := range instanceTypes {
Expand Down
1 change: 1 addition & 0 deletions pkg/utils/resources/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const (
NvidiaGPU = "nvidia.com/gpu"
AMDGPU = "amd.com/gpu"
AWSNeuron = "aws.amazon.com/neuron"
AWSPodENI = "vpc.amazonaws.com/pod-eni"
)

// RequestsForPods returns the total resources of a variadic list of podspecs.
Expand Down