Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add label and ability to schedule for az-id #6216

Merged
merged 19 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ func main() {
op.GetClient(),
op.AMIProvider,
op.SecurityGroupProvider,
op.SubnetProvider,
)
lo.Must0(op.AddHealthzCheck("cloud-provider", awsCloudProvider.LivenessProbe))
cloudProvider := metrics.Decorate(awsCloudProvider)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ require (
k8s.io/utils v0.0.0-20240102154912-e7106e64919e
knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd
sigs.k8s.io/controller-runtime v0.18.2
sigs.k8s.io/karpenter v0.36.1-0.20240521002315-9b145a6d85b4
sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181
sigs.k8s.io/yaml v1.4.0
)

Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -761,8 +761,8 @@ sigs.k8s.io/controller-runtime v0.18.2 h1:RqVW6Kpeaji67CY5nPEfRz6ZfFMk0lWQlNrLql
sigs.k8s.io/controller-runtime v0.18.2/go.mod h1:tuAt1+wbVsXIT8lPtk5RURxqAnq7xkpv2Mhttslg7Hw=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/karpenter v0.36.1-0.20240521002315-9b145a6d85b4 h1:zIKW8TX593mp/rlOdCqIbgUdVRQGHzeFkgDM6+zgeE8=
sigs.k8s.io/karpenter v0.36.1-0.20240521002315-9b145a6d85b4/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8=
sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181 h1:OQlVI9wqaV+VW8y13clzV/tM8sEgm0M/Fs/fVsrnRsY=
sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,9 @@ spec:
zone:
description: The associated availability zone
type: string
zoneID:
description: The associated availability zone ID
type: string
required:
- id
- zone
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/v1beta1/ec2nodeclass_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ type Subnet struct {
// The associated availability zone
// +required
Zone string `json:"zone"`
// The associated availability zone ID
// +optional
ZoneID string `json:"zoneID,omitempty"`
}

// SecurityGroup contains resolved SecurityGroup selector values utilized for node launch
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/v1beta1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func init() {
LabelInstanceAcceleratorName,
LabelInstanceAcceleratorManufacturer,
LabelInstanceAcceleratorCount,
LabelTopologyZoneID,
v1.LabelWindowsBuild,
)
}
Expand Down Expand Up @@ -94,6 +95,8 @@ var (

LabelNodeClass = Group + "/ec2nodeclass"

LabelTopologyZoneID = "topology.k8s.aws/zone-id"
jmdeal marked this conversation as resolved.
Show resolved Hide resolved

LabelInstanceHypervisor = Group + "/instance-hypervisor"
LabelInstanceEncryptionInTransitSupported = Group + "/instance-encryption-in-transit-supported"
LabelInstanceCategory = Group + "/instance-category"
Expand Down
41 changes: 33 additions & 8 deletions pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ import (
"github.com/aws/karpenter-provider-aws/pkg/providers/instance"
"github.com/aws/karpenter-provider-aws/pkg/providers/instancetype"
"github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup"
"github.com/aws/karpenter-provider-aws/pkg/providers/subnet"

"sigs.k8s.io/karpenter/pkg/cloudprovider"
)
Expand All @@ -62,18 +61,16 @@ type CloudProvider struct {
instanceProvider instance.Provider
amiProvider amifamily.Provider
securityGroupProvider securitygroup.Provider
subnetProvider subnet.Provider
}

func New(instanceTypeProvider instancetype.Provider, instanceProvider instance.Provider, recorder events.Recorder,
kubeClient client.Client, amiProvider amifamily.Provider, securityGroupProvider securitygroup.Provider, subnetProvider subnet.Provider) *CloudProvider {
kubeClient client.Client, amiProvider amifamily.Provider, securityGroupProvider securitygroup.Provider) *CloudProvider {
return &CloudProvider{
instanceTypeProvider: instanceTypeProvider,
instanceProvider: instanceProvider,
kubeClient: kubeClient,
amiProvider: amiProvider,
securityGroupProvider: securityGroupProvider,
subnetProvider: subnetProvider,
recorder: recorder,
}
}
Expand Down Expand Up @@ -106,7 +103,7 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC
instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool {
return i.Name == instance.Type
})
nc := c.instanceToNodeClaim(instance, instanceType)
nc := c.instanceToNodeClaim(instance, instanceType, nodeClass)
nc.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion,
Expand All @@ -125,7 +122,11 @@ func (c *CloudProvider) List(ctx context.Context) ([]*corev1beta1.NodeClaim, err
if err != nil {
return nil, fmt.Errorf("resolving instance type, %w", err)
}
nodeClaims = append(nodeClaims, c.instanceToNodeClaim(instance, instanceType))
nc, err := c.resolveNodeClassFromInstance(ctx, instance)
njtran marked this conversation as resolved.
Show resolved Hide resolved
if client.IgnoreNotFound(err) != nil {
return nil, fmt.Errorf("resolving nodeclass, %w", err)
}
nodeClaims = append(nodeClaims, c.instanceToNodeClaim(instance, instanceType, nc))
}
return nodeClaims, nil
}
Expand All @@ -144,7 +145,11 @@ func (c *CloudProvider) Get(ctx context.Context, providerID string) (*corev1beta
if err != nil {
return nil, fmt.Errorf("resolving instance type, %w", err)
}
return c.instanceToNodeClaim(instance, instanceType), nil
nc, err := c.resolveNodeClassFromInstance(ctx, instance)
if client.IgnoreNotFound(err) != nil {
return nil, fmt.Errorf("resolving nodeclass, %w", err)
}
return c.instanceToNodeClaim(instance, instanceType, nc), nil
}

func (c *CloudProvider) LivenessProbe(req *http.Request) error {
Expand Down Expand Up @@ -279,6 +284,14 @@ func (c *CloudProvider) resolveInstanceTypeFromInstance(ctx context.Context, ins
return instanceType, nil
}

func (c *CloudProvider) resolveNodeClassFromInstance(ctx context.Context, instance *instance.Instance) (*v1beta1.EC2NodeClass, error) {
np, err := c.resolveNodePoolFromInstance(ctx, instance)
if err != nil {
return nil, fmt.Errorf("resolving nodepool, %w", err)
}
return c.resolveNodeClassFromNodePool(ctx, np)
}

func (c *CloudProvider) resolveNodePoolFromInstance(ctx context.Context, instance *instance.Instance) (*corev1beta1.NodePool, error) {
if nodePoolName, ok := instance.Tags[corev1beta1.NodePoolLabelKey]; ok {
nodePool := &corev1beta1.NodePool{}
Expand All @@ -290,7 +303,8 @@ func (c *CloudProvider) resolveNodePoolFromInstance(ctx context.Context, instanc
return nil, errors.NewNotFound(schema.GroupResource{Group: corev1beta1.Group, Resource: "nodepools"}, "")
}

func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *cloudprovider.InstanceType) *corev1beta1.NodeClaim {
//nolint:gocyclo
func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *cloudprovider.InstanceType, nodeClass *v1beta1.EC2NodeClass) *corev1beta1.NodeClaim {
nodeClaim := &corev1beta1.NodeClaim{}
labels := map[string]string{}
annotations := map[string]string{}
Expand All @@ -316,6 +330,17 @@ func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *
nodeClaim.Status.Allocatable = functional.FilterMap(instanceType.Allocatable(), resourceFilter)
}
labels[v1.LabelTopologyZone] = i.Zone
// Attempt to resolve the zoneID from the instance's EC2NodeClass' status condition.
// If the EC2NodeClass is nil, we know we're in the List or Get paths, where we don't care about the zone-id value.
// If we're in the Create path, we've already validated the EC2NodeClass exists. In this case, we resolve the zone-id from the status condition
// both when creating offerings and when adding the label.
jmdeal marked this conversation as resolved.
Show resolved Hide resolved
if nodeClass != nil {
if subnet, ok := lo.Find(nodeClass.Status.Subnets, func(s v1beta1.Subnet) bool {
return s.Zone == i.Zone
}); ok && subnet.ZoneID != "" {
labels[v1beta1.LabelTopologyZoneID] = subnet.ZoneID
}
}
labels[corev1beta1.CapacityTypeLabelKey] = i.CapacityType
if v, ok := i.Tags[corev1beta1.NodePoolLabelKey]; ok {
labels[corev1beta1.NodePoolLabelKey] = v
Expand Down
44 changes: 31 additions & 13 deletions pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ var _ = BeforeSuite(func() {
fakeClock = clock.NewFakeClock(time.Now())
recorder = events.NewRecorder(&record.FakeRecorder{})
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder,
env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider)
env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider)
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster)
})
Expand Down Expand Up @@ -134,16 +134,19 @@ var _ = Describe("CloudProvider", func() {
},
Subnets: []v1beta1.Subnet{
{
ID: "subnet-test1",
Zone: "test-zone-1a",
ID: "subnet-test1",
Zone: "test-zone-1a",
ZoneID: "tstz1-1a",
},
{
ID: "subnet-test2",
Zone: "test-zone-1b",
ID: "subnet-test2",
Zone: "test-zone-1b",
ZoneID: "tstz1-1b",
},
{
ID: "subnet-test3",
Zone: "test-zone-1c",
ID: "subnet-test3",
Zone: "test-zone-1c",
ZoneID: "tstz1-1c",
},
},
},
Expand Down Expand Up @@ -208,6 +211,21 @@ var _ = Describe("CloudProvider", func() {
Expect(cloudProviderNodeClaim).ToNot(BeNil())
Expect(cloudProviderNodeClaim.Status.ImageID).ToNot(BeEmpty())
})
It("should return availability zone ID as a label on the nodeClaim", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expect(cloudProviderNodeClaim).ToNot(BeNil())
zone, ok := cloudProviderNodeClaim.GetLabels()[v1.LabelTopologyZone]
Expect(ok).To(BeTrue())
zoneID, ok := cloudProviderNodeClaim.GetLabels()[v1beta1.LabelTopologyZoneID]
Expect(ok).To(BeTrue())
subnet, ok := lo.Find(nodeClass.Status.Subnets, func(s v1beta1.Subnet) bool {
return s.Zone == zone
})
Expect(ok).To(BeTrue())
Expect(zoneID).To(Equal(subnet.ZoneID))
})
It("should return NodeClass Hash on the nodeClaim", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim)
Expand Down Expand Up @@ -1059,9 +1077,9 @@ var _ = Describe("CloudProvider", func() {
It("should launch instances into subnet with the most available IP addresses", func() {
awsEnv.SubnetCache.Flush()
awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{
{SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10),
{SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10),
Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}},
{SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(100),
{SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(100),
Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}},
}})
controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider)
Expand All @@ -1076,9 +1094,9 @@ var _ = Describe("CloudProvider", func() {
It("should launch instances into subnet with the most available IP addresses in-between cache refreshes", func() {
awsEnv.SubnetCache.Flush()
awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{
{SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10),
{SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10),
Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}},
{SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(11),
{SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(11),
Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}},
}})
controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider)
Expand Down Expand Up @@ -1112,9 +1130,9 @@ var _ = Describe("CloudProvider", func() {
})
It("should launch instances into subnets that are excluded by another NodePool", func() {
awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{
{SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10),
{SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10),
Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}},
{SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1b"), AvailableIpAddressCount: aws.Int64(100),
{SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1b"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(100),
Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}},
}})
nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ var _ = BeforeSuite(func() {
env = coretest.NewEnvironment(scheme.Scheme, coretest.WithCRDs(apis.CRDs...))
awsEnv = test.NewEnvironment(ctx, env)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}),
env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider)
env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider)
garbageCollectionController = garbagecollection.NewController(env.Client, cloudProvider)
})

Expand Down
5 changes: 3 additions & 2 deletions pkg/controllers/nodeclass/status/subnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ func (s *Subnet) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass)
})
nodeClass.Status.Subnets = lo.Map(subnets, func(ec2subnet *ec2.Subnet, _ int) v1beta1.Subnet {
return v1beta1.Subnet{
ID: *ec2subnet.SubnetId,
Zone: *ec2subnet.AvailabilityZone,
ID: *ec2subnet.SubnetId,
Zone: *ec2subnet.AvailabilityZone,
ZoneID: *ec2subnet.AvailabilityZoneId,
}
})

Expand Down
Loading
Loading