From 58e0ee81916f2d30c7534527e059e1bfbaa76e7f Mon Sep 17 00:00:00 2001 From: Reed Schalo Date: Thu, 16 May 2024 15:58:39 -0700 Subject: [PATCH 01/17] add ability to schedule on az-id --- Makefile | 1 + go.mod | 2 ++ hack/validation/labels.sh | 2 +- hack/validation/requirements.sh | 4 +-- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 3 ++ pkg/apis/crds/karpenter.sh_nodeclaims.yaml | 2 +- pkg/apis/crds/karpenter.sh_nodepools.yaml | 4 +-- pkg/apis/v1beta1/ec2nodeclass_status.go | 3 ++ pkg/apis/v1beta1/labels.go | 3 ++ pkg/cloudprovider/cloudprovider.go | 1 + pkg/cloudprovider/suite_test.go | 11 ++++++ pkg/controllers/nodeclass/status/subnet.go | 5 +-- .../providers/instancetype/suite_test.go | 3 +- pkg/providers/instance/instance.go | 21 +++++------ pkg/providers/instancetype/instancetype.go | 27 ++++++++++---- pkg/providers/instancetype/suite_test.go | 18 ++++++---- pkg/providers/instancetype/types.go | 7 ++-- pkg/providers/subnet/subnet.go | 35 ++++++++++++++++--- pkg/providers/subnet/suite_test.go | 1 + test/suites/integration/scheduling_test.go | 17 ++++++++- test/suites/nodeclaim/nodeclaim_test.go | 1 + 21 files changed, 131 insertions(+), 40 deletions(-) diff --git a/Makefile b/Makefile index 1b355fd76f88..0fcda26e4c2d 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,7 @@ run: ## Run Karpenter controller binary against your local cluster CLUSTER_NAME=${CLUSTER_NAME} \ INTERRUPTION_QUEUE=${CLUSTER_NAME} \ FEATURE_GATES="Drift=true" \ + LOG_LEVEL=debug \ go run ./cmd/controller/main.go test: ## Run tests diff --git a/go.mod b/go.mod index c98b111f9092..21a342ec861a 100644 --- a/go.mod +++ b/go.mod @@ -116,3 +116,5 @@ require ( sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) + +replace sigs.k8s.io/karpenter => /Users/schalor/Documents/github.com/rschalo/karpenter diff --git a/hack/validation/labels.sh b/hack/validation/labels.sh index f3b7990bc6d7..1fb9c6293b32 100755 --- a/hack/validation/labels.sh +++ b/hack/validation/labels.sh @@ -4,4 +4,4 @@ # ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.metadata.properties.labels.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self.all(x, x in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\"))"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml \ No newline at end of file + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self.all(x, x in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\"))"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml \ No newline at end of file diff --git a/hack/validation/requirements.sh b/hack/validation/requirements.sh index 3a74bb0d3962..763d359fab16 100755 --- a/hack/validation/requirements.sh +++ b/hack/validation/requirements.sh @@ -4,9 +4,9 @@ ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml # # Adding validation for nodepool # ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 01582e9bbbd5..8db886dfb1dd 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -608,6 +608,9 @@ spec: zone: description: The associated availability zone type: string + zoneId: + description: The associated availability zone ID + type: string required: - id - zone diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index 63dee5756d69..19bdcc94f335 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -220,7 +220,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 20e234581e45..582154bc2e33 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -190,7 +190,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: NodeClaimSpec describes the desired state of the NodeClaim @@ -348,7 +348,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/apis/v1beta1/ec2nodeclass_status.go b/pkg/apis/v1beta1/ec2nodeclass_status.go index 55e6f029717d..a574152bff2c 100644 --- a/pkg/apis/v1beta1/ec2nodeclass_status.go +++ b/pkg/apis/v1beta1/ec2nodeclass_status.go @@ -27,6 +27,9 @@ type Subnet struct { // The associated availability zone // +required Zone string `json:"zone"` + // The associated availability zone ID + // +optional + ZoneID string `json:"zoneId"` } // SecurityGroup contains resolved SecurityGroup selector values utilized for node launch diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index 35bb568ce77e..e9e27bffd015 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -46,6 +46,7 @@ func init() { LabelInstanceAcceleratorName, LabelInstanceAcceleratorManufacturer, LabelInstanceAcceleratorCount, + LabelInstanceAvailabilityZoneID, v1.LabelWindowsBuild, ) } @@ -94,6 +95,8 @@ var ( LabelNodeClass = Group + "/ec2nodeclass" + LabelInstanceAvailabilityZoneID = "topology.k8s.aws/zone-id" + LabelInstanceHypervisor = Group + "/instance-hypervisor" LabelInstanceEncryptionInTransitSupported = Group + "/instance-encryption-in-transit-supported" LabelInstanceCategory = Group + "/instance-category" diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 32c56e464b7e..c9d561d705ae 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -316,6 +316,7 @@ func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType * nodeClaim.Status.Allocatable = functional.FilterMap(instanceType.Allocatable(), resourceFilter) } labels[v1.LabelTopologyZone] = i.Zone + labels[v1beta1.LabelInstanceAvailabilityZoneID] = c.subnetProvider.ZoneInfo()[i.Zone] labels[corev1beta1.CapacityTypeLabelKey] = i.CapacityType if v, ok := i.Tags[corev1beta1.NodePoolLabelKey]; ok { labels[corev1beta1.NodePoolLabelKey] = v diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 8020fcb59a7c..58723e23e413 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -208,6 +208,17 @@ var _ = Describe("CloudProvider", func() { Expect(cloudProviderNodeClaim).ToNot(BeNil()) Expect(cloudProviderNodeClaim.Status.ImageID).ToNot(BeEmpty()) }) + It("should return availability zone ID as a label on the nodeClaim", func() { + ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) + cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim) + Expect(err).To(BeNil()) + Expect(cloudProviderNodeClaim).ToNot(BeNil()) + zone, ok := cloudProviderNodeClaim.GetLabels()[v1.LabelTopologyZone] + Expect(ok).To(BeTrue()) + zoneID, ok := cloudProviderNodeClaim.GetLabels()[v1beta1.LabelInstanceAvailabilityZoneID] + Expect(ok).To(BeTrue()) + Expect(zoneID).To(Equal(awsEnv.SubnetProvider.ZoneInfo()[zone])) + }) It("should return NodeClass Hash on the nodeClaim", func() { ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim) diff --git a/pkg/controllers/nodeclass/status/subnet.go b/pkg/controllers/nodeclass/status/subnet.go index 4b05aef0b57d..2f87638ca340 100644 --- a/pkg/controllers/nodeclass/status/subnet.go +++ b/pkg/controllers/nodeclass/status/subnet.go @@ -49,8 +49,9 @@ func (s *Subnet) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) }) nodeClass.Status.Subnets = lo.Map(subnets, func(ec2subnet *ec2.Subnet, _ int) v1beta1.Subnet { return v1beta1.Subnet{ - ID: *ec2subnet.SubnetId, - Zone: *ec2subnet.AvailabilityZone, + ID: *ec2subnet.SubnetId, + Zone: *ec2subnet.AvailabilityZone, + ZoneID: *ec2subnet.AvailabilityZoneId, } }) diff --git a/pkg/controllers/providers/instancetype/suite_test.go b/pkg/controllers/providers/instancetype/suite_test.go index 7e1a448eb049..6dc7d0e91a6e 100644 --- a/pkg/controllers/providers/instancetype/suite_test.go +++ b/pkg/controllers/providers/instancetype/suite_test.go @@ -18,6 +18,7 @@ import ( "context" "testing" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" @@ -150,7 +151,7 @@ var _ = Describe("InstanceType", func() { }) Expect(found).To(BeTrue()) for y := range instanceTypes[x].Offerings { - Expect(instanceTypes[x].Offerings[y].Zone).To(Equal(lo.FromPtr(offering.Location))) + Expect(instanceTypes[x].Offerings[y].Constraints[v1.LabelTopologyZone]).To(Equal(lo.FromPtr(offering.Location))) } } }) diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index a897d4f708a9..c8775366efd5 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -298,9 +298,10 @@ func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClas if err != nil { return nil, fmt.Errorf("getting launch templates, %w", err) } + requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) for _, launchTemplate := range launchTemplates { launchTemplateConfig := &ec2.FleetLaunchTemplateConfigRequest{ - Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...).Get(v1.LabelTopologyZone), capacityType, launchTemplate.ImageID), + Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, requirements, capacityType, launchTemplate.ImageID), LaunchTemplateSpecification: &ec2.FleetLaunchTemplateSpecificationRequest{ LaunchTemplateName: aws.String(launchTemplate.Name), Version: aws.String("$Latest"), @@ -318,7 +319,7 @@ func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClas // getOverrides creates and returns launch template overrides for the cross product of InstanceTypes and subnets (with subnets being constrained by // zones and the offerings in InstanceTypes) -func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, zones *scheduling.Requirement, capacityType string, image string) []*ec2.FleetLaunchTemplateOverridesRequest { +func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, reqs scheduling.Requirements, capacityType string, image string) []*ec2.FleetLaunchTemplateOverridesRequest { // Unwrap all the offerings to a flat slice that includes a pointer // to the parent instance type name type offeringWithParentName struct { @@ -335,16 +336,16 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy }) unwrappedOfferings = append(unwrappedOfferings, ofs...) } - var overrides []*ec2.FleetLaunchTemplateOverridesRequest for _, offering := range unwrappedOfferings { - if capacityType != offering.CapacityType { + if capacityType != offering.Constraints[corev1beta1.CapacityTypeLabelKey] { continue } - if !zones.Has(offering.Zone) { + if !reqs.Get(v1.LabelTopologyZone).Has(offering.Constraints[v1.LabelTopologyZone]) || + !reqs.Get(v1beta1.LabelInstanceAvailabilityZoneID).Has(offering.Constraints[v1beta1.LabelInstanceAvailabilityZoneID]) { continue } - subnet, ok := zonalSubnets[offering.Zone] + subnet, ok := zonalSubnets[offering.Constraints[v1.LabelTopologyZone]] if !ok { continue } @@ -377,7 +378,7 @@ func (p *DefaultProvider) getCapacityType(nodeClaim *corev1beta1.NodeClaim, inst if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Get(v1.LabelTopologyZone).Has(offering.Zone) && offering.CapacityType == corev1beta1.CapacityTypeSpot { + if requirements.Get(v1.LabelTopologyZone).Has(offering.Constraints[v1.LabelTopologyZone]) && offering.Constraints[corev1beta1.CapacityTypeLabelKey] == corev1beta1.CapacityTypeSpot { return corev1beta1.CapacityTypeSpot } } @@ -412,8 +413,8 @@ func (p *DefaultProvider) isMixedCapacityLaunch(nodeClaim *corev1beta1.NodeClaim if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Get(v1.LabelTopologyZone).Has(offering.Zone) { - if offering.CapacityType == corev1beta1.CapacityTypeSpot { + if requirements.Get(v1.LabelTopologyZone).Has(offering.Constraints[v1.LabelTopologyZone]) { + if offering.Constraints[corev1beta1.CapacityTypeLabelKey] == corev1beta1.CapacityTypeSpot { hasSpotOfferings = true } else { hasODOffering = true @@ -432,7 +433,7 @@ func filterUnwantedSpot(instanceTypes []*cloudprovider.InstanceType) []*cloudpro // first, find the price of our cheapest available on-demand instance type that could support this node for _, it := range instanceTypes { for _, o := range it.Offerings.Available() { - if o.CapacityType == corev1beta1.CapacityTypeOnDemand && o.Price < cheapestOnDemand { + if o.Constraints[corev1beta1.CapacityTypeLabelKey] == corev1beta1.CapacityTypeOnDemand && o.Price < cheapestOnDemand { cheapestOnDemand = o.Price } } diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 1f4765e440b2..c37dc448fc30 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -35,6 +35,7 @@ import ( "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/samber/lo" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" @@ -165,7 +166,8 @@ func (p *DefaultProvider) List(ctx context.Context, kc *corev1beta1.KubeletConfi return NewInstanceType(ctx, i, p.region, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, kc.MaxPods, kc.PodsPerCore, kc.KubeReserved, kc.SystemReserved, kc.EvictionHard, kc.EvictionSoft, - amiFamily, p.createOfferings(ctx, i, p.instanceTypeOfferings[aws.StringValue(i.InstanceType)], allZones, subnetZones)) + amiFamily, p.createOfferings(ctx, i, p.instanceTypeOfferings[aws.StringValue(i.InstanceType)], allZones, subnetZones, p.subnetProvider.ZoneInfo()), + ) }) p.instanceTypesCache.SetDefault(key, result) return result, nil @@ -249,7 +251,7 @@ func (p *DefaultProvider) UpdateInstanceTypeOfferings(ctx context.Context) error return nil } -func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2.InstanceTypeInfo, instanceTypeZones, zones, subnetZones sets.Set[string]) []cloudprovider.Offering { +func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2.InstanceTypeInfo, instanceTypeZones, zones, subnetZones sets.Set[string], zoneInfo map[string]string) []cloudprovider.Offering { var offerings []cloudprovider.Offering for zone := range zones { // while usage classes should be a distinct set, there's no guarantee of that @@ -271,11 +273,16 @@ func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2 continue } available := !isUnavailable && ok && instanceTypeZones.Has(zone) && subnetZones.Has(zone) + + // supported on 1.30 and not less offerings = append(offerings, cloudprovider.Offering{ - Zone: zone, - CapacityType: capacityType, - Price: price, - Available: available, + Constraints: map[string]string{ + corev1beta1.CapacityTypeLabelKey: capacityType, + v1.LabelTopologyZone: zone, + v1beta1.LabelInstanceAvailabilityZoneID: zoneInfo[zone], + }, + Price: price, + Available: available, }) instanceTypeOfferingAvailable.With(prometheus.Labels{ instanceTypeLabel: *instanceType.InstanceType, @@ -297,3 +304,11 @@ func (p *DefaultProvider) Reset() { p.instanceTypeOfferings = map[string]sets.Set[string]{} p.instanceTypesCache.Flush() } + +// func requirements(ct, zone, zoneID string) scheduling.Requirements { +// return scheduling.NewLabelRequirements(map[string]string{ +// corev1beta1.CapacityTypeLabelKey: ct, +// v1.LabelTopologyZone: zone, +// v1beta1.LabelInstanceAvailabilityZoneID: zoneID, +// }) +// } diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index e68352e3094d..3d5bcb61544a 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -245,6 +245,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1", + v1beta1.LabelInstanceAvailabilityZoneID: "", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -296,6 +297,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceGPUCount: "1", v1beta1.LabelInstanceGPUMemory: "16384", v1beta1.LabelInstanceLocalNVME: "900", + v1beta1.LabelInstanceAvailabilityZoneID: "", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -346,6 +348,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1", + v1beta1.LabelInstanceAvailabilityZoneID: "", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -878,8 +881,8 @@ var _ = Describe("InstanceTypeProvider", func() { for _, of := range it.Offerings { metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_available", map[string]string{ "instance_type": it.Name, - "capacity_type": of.CapacityType, - "zone": of.Zone, + "capacity_type": of.Constraints[corev1beta1.CapacityTypeLabelKey], + "zone": of.Constraints[v1.LabelTopologyZone], }) Expect(ok).To(BeTrue()) Expect(metric).To(Not(BeNil())) @@ -896,8 +899,8 @@ var _ = Describe("InstanceTypeProvider", func() { for _, of := range it.Offerings { metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_price_estimate", map[string]string{ "instance_type": it.Name, - "capacity_type": of.CapacityType, - "zone": of.Zone, + "capacity_type": of.Constraints[corev1beta1.CapacityTypeLabelKey], + "zone": of.Constraints[v1.LabelTopologyZone], }) Expect(ok).To(BeTrue()) Expect(metric).To(Not(BeNil())) @@ -2386,18 +2389,19 @@ func generateSpotPricing(cp *cloudprovider.CloudProvider, nodePool *corev1beta1. instanceType := it onDemandPrice := 1.00 for _, o := range it.Offerings { - if o.CapacityType == corev1beta1.CapacityTypeOnDemand { + if o.Constraints[corev1beta1.CapacityTypeLabelKey] == corev1beta1.CapacityTypeOnDemand { onDemandPrice = o.Price } } for _, o := range instanceType.Offerings { o := o - if o.CapacityType != corev1beta1.CapacityTypeSpot { + if o.Constraints[corev1beta1.CapacityTypeLabelKey] != corev1beta1.CapacityTypeSpot { continue } + zone := o.Constraints[v1.LabelTopologyZone] spotPrice := fmt.Sprintf("%0.3f", onDemandPrice*0.5) rsp.SpotPriceHistory = append(rsp.SpotPriceHistory, &ec2.SpotPrice{ - AvailabilityZone: &o.Zone, + AvailabilityZone: &zone, InstanceType: &instanceType.Name, SpotPrice: &spotPrice, Timestamp: &t, diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index 7c56717bf82a..68f3ac317ca2 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -78,11 +78,11 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1.LabelInstanceTypeStable, v1.NodeSelectorOpIn, aws.StringValue(info.InstanceType)), scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, getArchitecture(info)), scheduling.NewRequirement(v1.LabelOSStable, v1.NodeSelectorOpIn, getOS(info, amiFamily)...), - scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { return o.Zone })...), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { return o.Constraints[v1.LabelTopologyZone] })...), scheduling.NewRequirement(v1.LabelTopologyRegion, v1.NodeSelectorOpIn, region), scheduling.NewRequirement(v1.LabelWindowsBuild, v1.NodeSelectorOpDoesNotExist), // Well Known to Karpenter - scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { return o.CapacityType })...), + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { return o.Constraints[corev1beta1.CapacityTypeLabelKey] })...), // Well Known to AWS scheduling.NewRequirement(v1beta1.LabelInstanceCPU, v1.NodeSelectorOpIn, fmt.Sprint(aws.Int64Value(info.VCpuInfo.DefaultVCpus))), scheduling.NewRequirement(v1beta1.LabelInstanceCPUManufacturer, v1.NodeSelectorOpDoesNotExist), @@ -103,6 +103,9 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1beta1.LabelInstanceHypervisor, v1.NodeSelectorOpIn, aws.StringValue(info.Hypervisor)), scheduling.NewRequirement(v1beta1.LabelInstanceEncryptionInTransitSupported, v1.NodeSelectorOpIn, fmt.Sprint(aws.BoolValue(info.NetworkInfo.EncryptionInTransitSupported))), + scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { + return o.Constraints[v1beta1.LabelInstanceAvailabilityZoneID] + })...), ) // Instance Type Labels instanceFamilyParts := instanceTypeScheme.FindStringSubmatch(aws.StringValue(info.InstanceType)) diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index 3eee50827e61..8f0a020fd628 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -26,10 +26,12 @@ import ( "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" "github.com/samber/lo" + v1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/log" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" + corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/utils/pretty" ) @@ -40,6 +42,7 @@ type Provider interface { AssociatePublicIPAddressValue(*v1beta1.EC2NodeClass) *bool ZonalSubnetsForLaunch(context.Context, *v1beta1.EC2NodeClass, []*cloudprovider.InstanceType, string) (map[string]*Subnet, error) UpdateInflightIPs(*ec2.CreateFleetInput, *ec2.CreateFleetOutput, []*cloudprovider.InstanceType, []*Subnet, string) + ZoneInfo() map[string]string } type DefaultProvider struct { @@ -50,11 +53,13 @@ type DefaultProvider struct { associatePublicIPAddressCache *cache.Cache cm *pretty.ChangeMonitor inflightIPs map[string]int64 + zoneInfo map[string]string } type Subnet struct { ID string Zone string + ZoneID string AvailableIPAddressCount int64 } @@ -69,6 +74,8 @@ func NewDefaultProvider(ec2api ec2iface.EC2API, cache *cache.Cache, availableIPA associatePublicIPAddressCache: associatePublicIPAddressCache, // inflightIPs is used to track IPs from known launched instances inflightIPs: map[string]int64{}, + // availabilityZoneIDs is used to track possible az IDs based on discovered subnets + zoneInfo: map[string]string{}, } } @@ -96,6 +103,7 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl if err != nil { return nil, fmt.Errorf("describing subnets %s, %w", pretty.Concise(filters), err) } + availableZones := map[string]string{} for i := range output.Subnets { subnets[lo.FromPtr(output.Subnets[i].SubnetId)] = output.Subnets[i] p.availableIPAddressCache.SetDefault(lo.FromPtr(output.Subnets[i].SubnetId), lo.FromPtr(output.Subnets[i].AvailableIpAddressCount)) @@ -103,7 +111,10 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl // subnets can be leaked here, if a subnets is never called received from ec2 // we are accepting it for now, as this will be an insignificant amount of memory delete(p.inflightIPs, lo.FromPtr(output.Subnets[i].SubnetId)) // remove any previously tracked IP addresses since we just refreshed from EC2 + // add zone name to ID comparison as subnets are discovered + availableZones[aws.StringValue(output.Subnets[i].AvailabilityZone)] = aws.StringValue(output.Subnets[i].AvailabilityZoneId) } + p.zoneInfo = availableZones } p.cache.SetDefault(fmt.Sprint(hash), lo.Values(subnets)) if p.cm.HasChanged(fmt.Sprintf("subnets/%s", nodeClass.Name), subnets) { @@ -161,11 +172,16 @@ func (p *DefaultProvider) ZonalSubnetsForLaunch(ctx context.Context, nodeClass * continue } } - zonalSubnets[subnet.Zone] = &Subnet{ID: subnet.ID, Zone: subnet.Zone, AvailableIPAddressCount: availableIPAddressCount[subnet.ID]} + zonalSubnets[subnet.Zone] = &Subnet{ID: subnet.ID, Zone: subnet.Zone, ZoneID: subnet.ZoneID, AvailableIPAddressCount: availableIPAddressCount[subnet.ID]} } for _, subnet := range zonalSubnets { - predictedIPsUsed := p.minPods(instanceTypes, subnet.Zone, capacityType) + constraints := map[string]string{ + corev1beta1.CapacityTypeLabelKey: capacityType, + v1.LabelTopologyZone: subnet.Zone, + v1beta1.LabelInstanceAvailabilityZoneID: subnet.ZoneID, + } + predictedIPsUsed := p.minPods(instanceTypes, constraints) prevIPs := subnet.AvailableIPAddressCount if trackedIPs, ok := p.inflightIPs[subnet.ID]; ok { prevIPs = trackedIPs @@ -227,13 +243,22 @@ func (p *DefaultProvider) UpdateInflightIPs(createFleetInput *ec2.CreateFleetInp if originalSubnet.AvailableIPAddressCount == cachedIPAddressCount { // other IPs deducted were opportunistic and need to be readded since Fleet didn't pick those subnets to launch into if ips, ok := p.inflightIPs[originalSubnet.ID]; ok { - minPods := p.minPods(instanceTypes, originalSubnet.Zone, capacityType) + constraints := map[string]string{ + corev1beta1.CapacityTypeLabelKey: capacityType, + v1.LabelTopologyZone: originalSubnet.Zone, + v1beta1.LabelInstanceAvailabilityZoneID: originalSubnet.ZoneID, + } + minPods := p.minPods(instanceTypes, constraints) p.inflightIPs[originalSubnet.ID] = ips + minPods } } } } +func (p *DefaultProvider) ZoneInfo() map[string]string { + return p.zoneInfo +} + func (p *DefaultProvider) LivenessProbe(_ *http.Request) error { p.Lock() //nolint: staticcheck @@ -241,10 +266,10 @@ func (p *DefaultProvider) LivenessProbe(_ *http.Request) error { return nil } -func (p *DefaultProvider) minPods(instanceTypes []*cloudprovider.InstanceType, zone string, capacityType string) int64 { +func (p *DefaultProvider) minPods(instanceTypes []*cloudprovider.InstanceType, constraints map[string]string) int64 { // filter for instance types available in the zone and capacity type being requested filteredInstanceTypes := lo.Filter(instanceTypes, func(it *cloudprovider.InstanceType, _ int) bool { - offering, ok := it.Offerings.Get(capacityType, zone) + offering, ok := it.Offerings.Get(constraints) if !ok { return false } diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index 57d603863e44..e1af4e082a29 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -107,6 +107,7 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr(""), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index 153eef2a4ba2..067dac9cbc93 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -37,7 +37,7 @@ import ( . "github.com/onsi/gomega" ) -var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { +var _ = FDescribe("Scheduling", Ordered, ContinueOnFailure, func() { var selectors sets.Set[string] BeforeEach(func() { @@ -108,6 +108,21 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) env.ExpectCreatedNodeCount("==", 1) }) + It("should support well-known labels for zone id selection", func() { + selectors.Insert(v1beta1.LabelInstanceAvailabilityZoneID) // Add node selector keys to selectors used in testing to ensure we test all labels + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceAvailabilityZoneID, + Operator: v1.NodeSelectorOpIn, + Values: []string{"usw2-az3"}, + }, + }, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) It("should support well-known labels for local NVME storage", func() { selectors.Insert(v1beta1.LabelInstanceLocalNVME) // Add node selector keys to selectors used in testing to ensure we test all labels deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ diff --git a/test/suites/nodeclaim/nodeclaim_test.go b/test/suites/nodeclaim/nodeclaim_test.go index d4e81e4528cd..a6f542416f7e 100644 --- a/test/suites/nodeclaim/nodeclaim_test.go +++ b/test/suites/nodeclaim/nodeclaim_test.go @@ -165,6 +165,7 @@ var _ = Describe("StandaloneNodeClaim", func() { node := env.EventuallyExpectInitializedNodeCount("==", 1)[0] Expect(node.Annotations).To(HaveKeyWithValue("custom-annotation", "custom-value")) Expect(node.Labels).To(HaveKeyWithValue("custom-label", "custom-value")) + Expect(node.Labels).To(HaveKey("topology.k8s.aws/zone-id")) Expect(node.Spec.Taints).To(ContainElements( v1.Taint{ Key: "custom-taint", From 34a547696e853be0daf85036d642f48ed78aef53 Mon Sep 17 00:00:00 2001 From: Reed Schalo Date: Tue, 21 May 2024 17:32:03 -0700 Subject: [PATCH 02/17] middle of fixing unit tests, TODO: write e2etests --- go.sum | 2 - .../nodeclass/status/subnet_test.go | 131 +++++++++++------- .../providers/instancetype/suite_test.go | 3 +- pkg/fake/ec2api.go | 4 + pkg/providers/instance/instance.go | 16 +-- pkg/providers/instancetype/instancetype.go | 20 +-- pkg/providers/instancetype/suite_test.go | 14 +- pkg/providers/instancetype/types.go | 10 +- pkg/providers/subnet/subnet.go | 29 ++-- pkg/providers/subnet/suite_test.go | 4 + 10 files changed, 130 insertions(+), 103 deletions(-) diff --git a/go.sum b/go.sum index 579c0c65e186..c7ad2ee991b4 100644 --- a/go.sum +++ b/go.sum @@ -761,8 +761,6 @@ sigs.k8s.io/controller-runtime v0.18.2 h1:RqVW6Kpeaji67CY5nPEfRz6ZfFMk0lWQlNrLql sigs.k8s.io/controller-runtime v0.18.2/go.mod h1:tuAt1+wbVsXIT8lPtk5RURxqAnq7xkpv2Mhttslg7Hw= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v0.36.1-0.20240521002315-9b145a6d85b4 h1:zIKW8TX593mp/rlOdCqIbgUdVRQGHzeFkgDM6+zgeE8= -sigs.k8s.io/karpenter v0.36.1-0.20240521002315-9b145a6d85b4/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/pkg/controllers/nodeclass/status/subnet_test.go b/pkg/controllers/nodeclass/status/subnet_test.go index 2754faae6911..5658e0fc7cce 100644 --- a/pkg/controllers/nodeclass/status/subnet_test.go +++ b/pkg/controllers/nodeclass/status/subnet_test.go @@ -55,44 +55,51 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test4", - Zone: "test-zone-1a-local", + ID: "subnet-test4", + Zone: "test-zone-1a-local", + ZoneID: "tstz1-1alocal", }, })) }) It("Should have the correct ordering for the Subnets", func() { awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("subnet-test1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(20)}, - {SubnetId: aws.String("subnet-test2"), AvailabilityZone: aws.String("test-zone-1b"), AvailableIpAddressCount: aws.Int64(100)}, - {SubnetId: aws.String("subnet-test3"), AvailabilityZone: aws.String("test-zone-1c"), AvailableIpAddressCount: aws.Int64(50)}, + {SubnetId: aws.String("subnet-test1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(20)}, + {SubnetId: aws.String("subnet-test2"), AvailabilityZone: aws.String("test-zone-1b"), AvailabilityZoneId: aws.String("tstz1-1b"), AvailableIpAddressCount: aws.Int64(100)}, + {SubnetId: aws.String("subnet-test3"), AvailabilityZone: aws.String("test-zone-1c"), AvailabilityZoneId: aws.String("tstz1-1c"), AvailableIpAddressCount: aws.Int64(50)}, }}) ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, })) }) @@ -110,12 +117,14 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, })) }) @@ -130,8 +139,9 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, })) }) @@ -141,20 +151,24 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test4", - Zone: "test-zone-1a-local", + ID: "subnet-test4", + Zone: "test-zone-1a-local", + ZoneID: "tstz1-1alocal", }, })) @@ -175,12 +189,14 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, })) }) @@ -190,20 +206,24 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test4", - Zone: "test-zone-1a-local", + ID: "subnet-test4", + Zone: "test-zone-1a-local", + ZoneID: "tstz1-1alocal", }, })) @@ -217,8 +237,9 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, })) }) @@ -241,20 +262,24 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test4", - Zone: "test-zone-1a-local", + ID: "subnet-test4", + Zone: "test-zone-1a-local", + ZoneID: "tstz1-1alocal", }, })) diff --git a/pkg/controllers/providers/instancetype/suite_test.go b/pkg/controllers/providers/instancetype/suite_test.go index 6dc7d0e91a6e..5b0fba5fc2a1 100644 --- a/pkg/controllers/providers/instancetype/suite_test.go +++ b/pkg/controllers/providers/instancetype/suite_test.go @@ -18,7 +18,6 @@ import ( "context" "testing" - v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" @@ -151,7 +150,7 @@ var _ = Describe("InstanceType", func() { }) Expect(found).To(BeTrue()) for y := range instanceTypes[x].Offerings { - Expect(instanceTypes[x].Offerings[y].Constraints[v1.LabelTopologyZone]).To(Equal(lo.FromPtr(offering.Location))) + Expect(instanceTypes[x].Offerings[y].Zone()).To(Equal(lo.FromPtr(offering.Location))) } } }) diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index ce9cf21e0087..63b349a3a46e 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -418,6 +418,7 @@ func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.Descri { SubnetId: aws.String("subnet-test1"), AvailabilityZone: aws.String("test-zone-1a"), + AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(100), MapPublicIpOnLaunch: aws.Bool(false), Tags: []*ec2.Tag{ @@ -428,6 +429,7 @@ func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.Descri { SubnetId: aws.String("subnet-test2"), AvailabilityZone: aws.String("test-zone-1b"), + AvailabilityZoneId: aws.String("tstz1-1b"), AvailableIpAddressCount: aws.Int64(100), MapPublicIpOnLaunch: aws.Bool(true), Tags: []*ec2.Tag{ @@ -438,6 +440,7 @@ func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.Descri { SubnetId: aws.String("subnet-test3"), AvailabilityZone: aws.String("test-zone-1c"), + AvailabilityZoneId: aws.String("tstz1-1c"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{ {Key: aws.String("Name"), Value: aws.String("test-subnet-3")}, @@ -448,6 +451,7 @@ func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.Descri { SubnetId: aws.String("subnet-test4"), AvailabilityZone: aws.String("test-zone-1a-local"), + AvailabilityZoneId: aws.String("tstz1-1alocal"), AvailableIpAddressCount: aws.Int64(100), MapPublicIpOnLaunch: aws.Bool(true), Tags: []*ec2.Tag{ diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index c8775366efd5..61fa67ee4ff9 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -338,14 +338,14 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy } var overrides []*ec2.FleetLaunchTemplateOverridesRequest for _, offering := range unwrappedOfferings { - if capacityType != offering.Constraints[corev1beta1.CapacityTypeLabelKey] { + if capacityType != offering.CapacityType() { continue } - if !reqs.Get(v1.LabelTopologyZone).Has(offering.Constraints[v1.LabelTopologyZone]) || - !reqs.Get(v1beta1.LabelInstanceAvailabilityZoneID).Has(offering.Constraints[v1beta1.LabelInstanceAvailabilityZoneID]) { + if !reqs.Get(v1.LabelTopologyZone).Has(offering.Zone()) || + !reqs.Get(v1beta1.LabelInstanceAvailabilityZoneID).Has(offering.CapacityType()) { continue } - subnet, ok := zonalSubnets[offering.Constraints[v1.LabelTopologyZone]] + subnet, ok := zonalSubnets[offering.Zone()] if !ok { continue } @@ -378,7 +378,7 @@ func (p *DefaultProvider) getCapacityType(nodeClaim *corev1beta1.NodeClaim, inst if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Get(v1.LabelTopologyZone).Has(offering.Constraints[v1.LabelTopologyZone]) && offering.Constraints[corev1beta1.CapacityTypeLabelKey] == corev1beta1.CapacityTypeSpot { + if requirements.Get(v1.LabelTopologyZone).Has(offering.Zone()) && offering.CapacityType() == corev1beta1.CapacityTypeSpot { return corev1beta1.CapacityTypeSpot } } @@ -413,8 +413,8 @@ func (p *DefaultProvider) isMixedCapacityLaunch(nodeClaim *corev1beta1.NodeClaim if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Get(v1.LabelTopologyZone).Has(offering.Constraints[v1.LabelTopologyZone]) { - if offering.Constraints[corev1beta1.CapacityTypeLabelKey] == corev1beta1.CapacityTypeSpot { + if requirements.Get(v1.LabelTopologyZone).Has(offering.Zone()) { + if offering.CapacityType() == corev1beta1.CapacityTypeSpot { hasSpotOfferings = true } else { hasODOffering = true @@ -433,7 +433,7 @@ func filterUnwantedSpot(instanceTypes []*cloudprovider.InstanceType) []*cloudpro // first, find the price of our cheapest available on-demand instance type that could support this node for _, it := range instanceTypes { for _, o := range it.Offerings.Available() { - if o.Constraints[corev1beta1.CapacityTypeLabelKey] == corev1beta1.CapacityTypeOnDemand && o.Price < cheapestOnDemand { + if o.CapacityType() == corev1beta1.CapacityTypeOnDemand && o.Price < cheapestOnDemand { cheapestOnDemand = o.Price } } diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index c37dc448fc30..277b448b00d8 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -27,6 +27,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + "sigs.k8s.io/karpenter/pkg/scheduling" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" awscache "github.com/aws/karpenter-provider-aws/pkg/cache" @@ -274,13 +275,12 @@ func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2 } available := !isUnavailable && ok && instanceTypeZones.Has(zone) && subnetZones.Has(zone) - // supported on 1.30 and not less offerings = append(offerings, cloudprovider.Offering{ - Constraints: map[string]string{ - corev1beta1.CapacityTypeLabelKey: capacityType, - v1.LabelTopologyZone: zone, - v1beta1.LabelInstanceAvailabilityZoneID: zoneInfo[zone], - }, + Requirements: scheduling.NewRequirements( + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, zone), + scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, zoneInfo[zone]), + ), Price: price, Available: available, }) @@ -304,11 +304,3 @@ func (p *DefaultProvider) Reset() { p.instanceTypeOfferings = map[string]sets.Set[string]{} p.instanceTypesCache.Flush() } - -// func requirements(ct, zone, zoneID string) scheduling.Requirements { -// return scheduling.NewLabelRequirements(map[string]string{ -// corev1beta1.CapacityTypeLabelKey: ct, -// v1.LabelTopologyZone: zone, -// v1beta1.LabelInstanceAvailabilityZoneID: zoneID, -// }) -// } diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 3d5bcb61544a..be057c502869 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -881,8 +881,8 @@ var _ = Describe("InstanceTypeProvider", func() { for _, of := range it.Offerings { metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_available", map[string]string{ "instance_type": it.Name, - "capacity_type": of.Constraints[corev1beta1.CapacityTypeLabelKey], - "zone": of.Constraints[v1.LabelTopologyZone], + "capacity_type": of.CapacityType(), + "zone": of.Zone(), }) Expect(ok).To(BeTrue()) Expect(metric).To(Not(BeNil())) @@ -899,8 +899,8 @@ var _ = Describe("InstanceTypeProvider", func() { for _, of := range it.Offerings { metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_price_estimate", map[string]string{ "instance_type": it.Name, - "capacity_type": of.Constraints[corev1beta1.CapacityTypeLabelKey], - "zone": of.Constraints[v1.LabelTopologyZone], + "capacity_type": of.CapacityType(), + "zone": of.Zone(), }) Expect(ok).To(BeTrue()) Expect(metric).To(Not(BeNil())) @@ -2389,16 +2389,16 @@ func generateSpotPricing(cp *cloudprovider.CloudProvider, nodePool *corev1beta1. instanceType := it onDemandPrice := 1.00 for _, o := range it.Offerings { - if o.Constraints[corev1beta1.CapacityTypeLabelKey] == corev1beta1.CapacityTypeOnDemand { + if o.CapacityType() == corev1beta1.CapacityTypeOnDemand { onDemandPrice = o.Price } } for _, o := range instanceType.Offerings { o := o - if o.Constraints[corev1beta1.CapacityTypeLabelKey] != corev1beta1.CapacityTypeSpot { + if o.CapacityType() != corev1beta1.CapacityTypeSpot { continue } - zone := o.Constraints[v1.LabelTopologyZone] + zone := o.Zone() spotPrice := fmt.Sprintf("%0.3f", onDemandPrice*0.5) rsp.SpotPriceHistory = append(rsp.SpotPriceHistory, &ec2.SpotPrice{ AvailabilityZone: &zone, diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index 68f3ac317ca2..be68681af08d 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -78,11 +78,15 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1.LabelInstanceTypeStable, v1.NodeSelectorOpIn, aws.StringValue(info.InstanceType)), scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, getArchitecture(info)), scheduling.NewRequirement(v1.LabelOSStable, v1.NodeSelectorOpIn, getOS(info, amiFamily)...), - scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { return o.Constraints[v1.LabelTopologyZone] })...), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { + return o.Zone() + })...), scheduling.NewRequirement(v1.LabelTopologyRegion, v1.NodeSelectorOpIn, region), scheduling.NewRequirement(v1.LabelWindowsBuild, v1.NodeSelectorOpDoesNotExist), // Well Known to Karpenter - scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { return o.Constraints[corev1beta1.CapacityTypeLabelKey] })...), + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { + return o.CapacityType() + })...), // Well Known to AWS scheduling.NewRequirement(v1beta1.LabelInstanceCPU, v1.NodeSelectorOpIn, fmt.Sprint(aws.Int64Value(info.VCpuInfo.DefaultVCpus))), scheduling.NewRequirement(v1beta1.LabelInstanceCPUManufacturer, v1.NodeSelectorOpDoesNotExist), @@ -104,7 +108,7 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1beta1.LabelInstanceHypervisor, v1.NodeSelectorOpIn, aws.StringValue(info.Hypervisor)), scheduling.NewRequirement(v1beta1.LabelInstanceEncryptionInTransitSupported, v1.NodeSelectorOpIn, fmt.Sprint(aws.BoolValue(info.NetworkInfo.EncryptionInTransitSupported))), scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.Constraints[v1beta1.LabelInstanceAvailabilityZoneID] + return o.Requirements.Get(v1beta1.LabelInstanceAvailabilityZoneID).Values()[0] })...), ) // Instance Type Labels diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index 8f0a020fd628..65b21e272320 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -33,6 +33,7 @@ import ( corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/scheduling" "sigs.k8s.io/karpenter/pkg/utils/pretty" ) @@ -176,12 +177,12 @@ func (p *DefaultProvider) ZonalSubnetsForLaunch(ctx context.Context, nodeClass * } for _, subnet := range zonalSubnets { - constraints := map[string]string{ - corev1beta1.CapacityTypeLabelKey: capacityType, - v1.LabelTopologyZone: subnet.Zone, - v1beta1.LabelInstanceAvailabilityZoneID: subnet.ZoneID, - } - predictedIPsUsed := p.minPods(instanceTypes, constraints) + reqs := scheduling.NewRequirements( + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, subnet.Zone), + scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, subnet.ZoneID), + ) + predictedIPsUsed := p.minPods(instanceTypes, reqs) prevIPs := subnet.AvailableIPAddressCount if trackedIPs, ok := p.inflightIPs[subnet.ID]; ok { prevIPs = trackedIPs @@ -243,12 +244,12 @@ func (p *DefaultProvider) UpdateInflightIPs(createFleetInput *ec2.CreateFleetInp if originalSubnet.AvailableIPAddressCount == cachedIPAddressCount { // other IPs deducted were opportunistic and need to be readded since Fleet didn't pick those subnets to launch into if ips, ok := p.inflightIPs[originalSubnet.ID]; ok { - constraints := map[string]string{ - corev1beta1.CapacityTypeLabelKey: capacityType, - v1.LabelTopologyZone: originalSubnet.Zone, - v1beta1.LabelInstanceAvailabilityZoneID: originalSubnet.ZoneID, - } - minPods := p.minPods(instanceTypes, constraints) + reqs := scheduling.NewRequirements( + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, originalSubnet.Zone), + scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, originalSubnet.ZoneID), + ) + minPods := p.minPods(instanceTypes, reqs) p.inflightIPs[originalSubnet.ID] = ips + minPods } } @@ -266,10 +267,10 @@ func (p *DefaultProvider) LivenessProbe(_ *http.Request) error { return nil } -func (p *DefaultProvider) minPods(instanceTypes []*cloudprovider.InstanceType, constraints map[string]string) int64 { +func (p *DefaultProvider) minPods(instanceTypes []*cloudprovider.InstanceType, reqs scheduling.Requirements) int64 { // filter for instance types available in the zone and capacity type being requested filteredInstanceTypes := lo.Filter(instanceTypes, func(it *cloudprovider.InstanceType, _ int) bool { - offering, ok := it.Offerings.Get(constraints) + offering, ok := it.Offerings.Get(reqs) if !ok { return false } diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index e1af4e082a29..ac63b79e34ff 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -334,6 +334,7 @@ var _ = Describe("SubnetProvider", func() { Expect(subnets).To(BeEquivalentTo([]*ec2.Subnet{ { AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), SubnetId: lo.ToPtr("subnet-test1"), MapPublicIpOnLaunch: lo.ToPtr(false), @@ -350,6 +351,7 @@ var _ = Describe("SubnetProvider", func() { }, { AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), MapPublicIpOnLaunch: lo.ToPtr(true), SubnetId: lo.ToPtr("subnet-test2"), @@ -367,6 +369,7 @@ var _ = Describe("SubnetProvider", func() { }, { AvailabilityZone: lo.ToPtr("test-zone-1c"), + AvailabilityZoneId: lo.ToPtr("tstz1-1c"), AvailableIpAddressCount: lo.ToPtr[int64](100), SubnetId: lo.ToPtr("subnet-test3"), Tags: []*ec2.Tag{ @@ -385,6 +388,7 @@ var _ = Describe("SubnetProvider", func() { }, { AvailabilityZone: lo.ToPtr("test-zone-1a-local"), + AvailabilityZoneId: lo.ToPtr("tstz1-1alocal"), AvailableIpAddressCount: lo.ToPtr[int64](100), SubnetId: lo.ToPtr("subnet-test4"), MapPublicIpOnLaunch: lo.ToPtr(true), From 7949428e9b4f756b593097fbf72c4ecbd045351a Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Tue, 21 May 2024 23:49:04 -0700 Subject: [PATCH 03/17] deps (temp): pin to upstream PR --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 21a342ec861a..5b5115bca4aa 100644 --- a/go.mod +++ b/go.mod @@ -117,4 +117,4 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) -replace sigs.k8s.io/karpenter => /Users/schalor/Documents/github.com/rschalo/karpenter +replace sigs.k8s.io/karpenter => github.com/rschalo/karpenter v0.0.0-20240521231044-957333ae5ef7 diff --git a/go.sum b/go.sum index c7ad2ee991b4..ced4dcd44f31 100644 --- a/go.sum +++ b/go.sum @@ -331,6 +331,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/rschalo/karpenter v0.0.0-20240521231044-957333ae5ef7 h1:4akrWjwsoM0EH2dtF8UklHnBa6IIE1+6icY+WMt7Rr4= +github.com/rschalo/karpenter v0.0.0-20240521231044-957333ae5ef7/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= From f5ac78dc585c55317f1d572023163262c7553b98 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Tue, 21 May 2024 23:50:49 -0700 Subject: [PATCH 04/17] chore: misc functional fixes --- pkg/cloudprovider/suite_test.go | 12 ++++++------ pkg/fake/ec2api.go | 8 ++++---- pkg/providers/instance/instance.go | 16 ++++++++-------- pkg/providers/instancetype/suite_test.go | 6 +++--- pkg/providers/subnet/subnet.go | 2 +- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 58723e23e413..87e2fc7aabb1 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -1070,9 +1070,9 @@ var _ = Describe("CloudProvider", func() { It("should launch instances into subnet with the most available IP addresses", func() { awsEnv.SubnetCache.Flush() awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10), + {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}}, - {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(100), + {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) @@ -1087,9 +1087,9 @@ var _ = Describe("CloudProvider", func() { It("should launch instances into subnet with the most available IP addresses in-between cache refreshes", func() { awsEnv.SubnetCache.Flush() awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10), + {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}}, - {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(11), + {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(11), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) @@ -1123,9 +1123,9 @@ var _ = Describe("CloudProvider", func() { }) It("should launch instances into subnets that are excluded by another NodePool", func() { awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10), + {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}}, - {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1b"), AvailableIpAddressCount: aws.Int64(100), + {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1b"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}} diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index 63b349a3a46e..654e986084da 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -517,10 +517,10 @@ func (e *EC2API) DescribeAvailabilityZonesWithContext(context.Context, *ec2.Desc return e.DescribeAvailabilityZonesOutput.Clone(), nil } return &ec2.DescribeAvailabilityZonesOutput{AvailabilityZones: []*ec2.AvailabilityZone{ - {ZoneName: aws.String("test-zone-1a"), ZoneId: aws.String("testzone1a"), ZoneType: aws.String("availability-zone")}, - {ZoneName: aws.String("test-zone-1b"), ZoneId: aws.String("testzone1b"), ZoneType: aws.String("availability-zone")}, - {ZoneName: aws.String("test-zone-1c"), ZoneId: aws.String("testzone1c"), ZoneType: aws.String("availability-zone")}, - {ZoneName: aws.String("test-zone-1a-local"), ZoneId: aws.String("testzone1alocal"), ZoneType: aws.String("local-zone")}, + {ZoneName: aws.String("test-zone-1a"), ZoneId: aws.String("tstz1-1a"), ZoneType: aws.String("availability-zone")}, + {ZoneName: aws.String("test-zone-1b"), ZoneId: aws.String("tstz1-1b"), ZoneType: aws.String("availability-zone")}, + {ZoneName: aws.String("test-zone-1c"), ZoneId: aws.String("tstz1-1c"), ZoneType: aws.String("availability-zone")}, + {ZoneName: aws.String("test-zone-1a-local"), ZoneId: aws.String("tstz1-1alocal"), ZoneType: aws.String("local-zone")}, }}, nil } diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 61fa67ee4ff9..c00c0f9ff96a 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -341,8 +341,7 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy if capacityType != offering.CapacityType() { continue } - if !reqs.Get(v1.LabelTopologyZone).Has(offering.Zone()) || - !reqs.Get(v1beta1.LabelInstanceAvailabilityZoneID).Has(offering.CapacityType()) { + if !reqs.Get(v1.LabelTopologyZone).Has(offering.Zone()) || !reqs.Get(v1beta1.LabelInstanceAvailabilityZoneID).Has(offering.Requirements.Get(v1beta1.LabelInstanceAvailabilityZoneID).Values()[0]) { continue } subnet, ok := zonalSubnets[offering.Zone()] @@ -413,12 +412,13 @@ func (p *DefaultProvider) isMixedCapacityLaunch(nodeClaim *corev1beta1.NodeClaim if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Get(v1.LabelTopologyZone).Has(offering.Zone()) { - if offering.CapacityType() == corev1beta1.CapacityTypeSpot { - hasSpotOfferings = true - } else { - hasODOffering = true - } + if !requirements.Get(v1.LabelTopologyZone).Has(offering.Zone()) || !requirements.Get(v1beta1.LabelInstanceAvailabilityZoneID).Has(offering.Requirements.Get(v1beta1.LabelInstanceAvailabilityZoneID).Values()[0]) { + continue + } + if offering.CapacityType() == corev1beta1.CapacityTypeSpot { + hasSpotOfferings = true + } else { + hasODOffering = true } } } diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index be057c502869..fbab07d8a4cd 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -245,7 +245,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1", - v1beta1.LabelInstanceAvailabilityZoneID: "", + v1beta1.LabelInstanceAvailabilityZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -297,7 +297,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceGPUCount: "1", v1beta1.LabelInstanceGPUMemory: "16384", v1beta1.LabelInstanceLocalNVME: "900", - v1beta1.LabelInstanceAvailabilityZoneID: "", + v1beta1.LabelInstanceAvailabilityZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -348,7 +348,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1", - v1beta1.LabelInstanceAvailabilityZoneID: "", + v1beta1.LabelInstanceAvailabilityZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index 65b21e272320..42bd53b821aa 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -121,7 +121,7 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl if p.cm.HasChanged(fmt.Sprintf("subnets/%s", nodeClass.Name), subnets) { log.FromContext(ctx). WithValues("subnets", lo.Map(lo.Values(subnets), func(s *ec2.Subnet, _ int) string { - return fmt.Sprintf("%s (%s)", aws.StringValue(s.SubnetId), aws.StringValue(s.AvailabilityZone)) + return fmt.Sprintf("%s (zone: %s, zone-id: %s)", aws.StringValue(s.SubnetId), aws.StringValue(s.AvailabilityZone), aws.StringValue(s.AvailabilityZoneId)) })). V(1).Info("discovered subnets") } From c37640900823caa41ebf70fcd4d4aebbdc63d9ee Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Tue, 21 May 2024 23:51:05 -0700 Subject: [PATCH 05/17] test: additional E2E tests --- test/pkg/environment/aws/expectations.go | 8 +++ test/suites/integration/scheduling_test.go | 84 ++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/test/pkg/environment/aws/expectations.go b/test/pkg/environment/aws/expectations.go index 43f717f492f4..651d5541c6de 100644 --- a/test/pkg/environment/aws/expectations.go +++ b/test/pkg/environment/aws/expectations.go @@ -218,6 +218,14 @@ func (env *Environment) GetZones() map[string]string { }) } +// GetZoneIDMapping returns map from zone to zone id +func (env *Environment) GetZoneIDMapping() map[string]string { + output := lo.Must(env.EC2API.DescribeAvailabilityZones(&ec2.DescribeAvailabilityZonesInput{})) + return lo.Associate(output.AvailabilityZones, func(zone *ec2.AvailabilityZone) (string, string) { + return lo.FromPtr(zone.ZoneName), lo.FromPtr(zone.ZoneId) + }) +} + // GetSubnets returns all subnets matching the label selector // mapped from AZ -> {subnet-ids...} func (env *Environment) GetSubnets(tags map[string]string) map[string][]string { diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index 067dac9cbc93..0c7a79581e74 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -16,6 +16,7 @@ package integration_test import ( "fmt" + "strconv" "time" "github.com/samber/lo" @@ -596,6 +597,89 @@ var _ = FDescribe("Scheduling", Ordered, ContinueOnFailure, func() { env.ExpectCreated(nodePool, nodeClass, pod) env.EventuallyExpectHealthy(pod) }) + + It("should provision a node for a pod with overlapping zone and zone-id requirements", func() { + type mapping struct { + zone string + zoneID string + } + subnetZones := lo.Keys(env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName})) + zones := lo.Filter(lo.MapToSlice(env.GetZoneIDMapping(), func(zone, zoneID string) mapping { + return mapping{zone, zoneID} + }), func(m mapping, _ int) bool { + return lo.Contains(subnetZones, m.zone) + }) + Expect(len(zones)).To(BeNumerically(">=", 3)) + pod := test.Pod(test.PodOptions{ + NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: v1.LabelTopologyZone, + Operator: v1.NodeSelectorOpIn, + Values: lo.Map(zones[0:2], func(m mapping, _ int) string { return m.zone }), + }, + { + Key: v1beta1.LabelInstanceAvailabilityZoneID, + Operator: v1.NodeSelectorOpIn, + Values: lo.Map(zones[1:3], func(m mapping, _ int) string { return m.zoneID }), + }, + }, + }) + env.ExpectCreated(nodePool, nodeClass, pod) + node := env.EventuallyExpectNodeCount("==", 1)[0] + Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(zones[1].zone)) + Expect(node.Labels[v1beta1.LabelInstanceAvailabilityZoneID]).To(Equal(zones[1].zoneID)) + }) + + It("should provision nodes for pods with zone-id requirements in the correct zone", func() { + const domainLabel = "domain-label" + test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: v1.NodeSelectorRequirement{ + Key: domainLabel, + Operator: v1.NodeSelectorOpExists, + }, + }) + + type mapping struct { + zone string + zoneID string + } + subnetZones := lo.Keys(env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName})) + zones := lo.Filter(lo.MapToSlice(env.GetZoneIDMapping(), func(zone, zoneID string) mapping { + return mapping{zone, zoneID} + }), func(m mapping, _ int) bool { + return lo.Contains(subnetZones, m.zone) + }) + + pods := lo.Times(len(zones), func(index int) *v1.Pod { + return test.Pod(test.PodOptions{ + NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: domainLabel, + Operator: v1.NodeSelectorOpIn, + Values: []string{fmt.Sprintf("%d", index)}, + }, + { + Key: v1beta1.LabelInstanceAvailabilityZoneID, + Operator: v1.NodeSelectorOpIn, + Values: []string{zones[index].zoneID}, + }, + }, + }) + }) + env.ExpectCreated(nodePool, nodeClass) + for _, pod := range pods { + env.ExpectCreated(pod) + } + nodes := env.EventuallyExpectCreatedNodeCount("==", len(zones)) + for _, node := range nodes { + domainValue, ok := node.Labels[domainLabel] + Expect(ok).To(BeTrue()) + expectedIndex, err := strconv.Atoi(domainValue) + Expect(err).To(BeNil()) + Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(zones[expectedIndex].zone)) + Expect(node.Labels[v1beta1.LabelInstanceAvailabilityZoneID]).To(Equal(zones[expectedIndex].zoneID)) + } + }) }) }) From 45a0c55014c8a658f6ff95b26c336be003cc8354 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 22 May 2024 08:50:29 -0700 Subject: [PATCH 06/17] fix remaining tests --- hack/validation/labels.sh | 2 +- hack/validation/requirements.sh | 4 +- pkg/apis/crds/karpenter.sh_nodeclaims.yaml | 2 +- pkg/apis/crds/karpenter.sh_nodepools.yaml | 4 +- test/suites/integration/scheduling_test.go | 50 ++++++++++------------ 5 files changed, 29 insertions(+), 33 deletions(-) diff --git a/hack/validation/labels.sh b/hack/validation/labels.sh index 1fb9c6293b32..f3b7990bc6d7 100755 --- a/hack/validation/labels.sh +++ b/hack/validation/labels.sh @@ -4,4 +4,4 @@ # ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.metadata.properties.labels.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self.all(x, x in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\"))"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml \ No newline at end of file + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self.all(x, x in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\"))"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml \ No newline at end of file diff --git a/hack/validation/requirements.sh b/hack/validation/requirements.sh index 763d359fab16..3a74bb0d3962 100755 --- a/hack/validation/requirements.sh +++ b/hack/validation/requirements.sh @@ -4,9 +4,9 @@ ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml # # Adding validation for nodepool # ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index 19bdcc94f335..63dee5756d69 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -220,7 +220,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 582154bc2e33..20e234581e45 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -190,7 +190,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: NodeClaimSpec describes the desired state of the NodeClaim @@ -348,7 +348,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index 0c7a79581e74..baec7b14862e 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -16,7 +16,6 @@ package integration_test import ( "fmt" - "strconv" "time" "github.com/samber/lo" @@ -38,7 +37,7 @@ import ( . "github.com/onsi/gomega" ) -var _ = FDescribe("Scheduling", Ordered, ContinueOnFailure, func() { +var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { var selectors sets.Set[string] BeforeEach(func() { @@ -116,7 +115,12 @@ var _ = FDescribe("Scheduling", Ordered, ContinueOnFailure, func() { { Key: v1beta1.LabelInstanceAvailabilityZoneID, Operator: v1.NodeSelectorOpIn, - Values: []string{"usw2-az3"}, + Values: func() []string { + zoneIDMapping := env.GetZoneIDMapping() + subnetZones := lo.Keys(env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName})) + targetZoneID := zoneIDMapping[subnetZones[0]] + return []string{targetZoneID} + }(), }, }, }}) @@ -610,6 +614,9 @@ var _ = FDescribe("Scheduling", Ordered, ContinueOnFailure, func() { return lo.Contains(subnetZones, m.zone) }) Expect(len(zones)).To(BeNumerically(">=", 3)) + + // Create a pod with 'overlapping' zone and zone-id requirements. With two options for each label, but only one pair of zone-zoneID that maps to the + // same AZ, we will always expect the pod to be scheduled to that AZ. In this case, this is the mapping at zone[1]. pod := test.Pod(test.PodOptions{ NodeRequirements: []v1.NodeSelectorRequirement{ { @@ -629,55 +636,44 @@ var _ = FDescribe("Scheduling", Ordered, ContinueOnFailure, func() { Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(zones[1].zone)) Expect(node.Labels[v1beta1.LabelInstanceAvailabilityZoneID]).To(Equal(zones[1].zoneID)) }) - It("should provision nodes for pods with zone-id requirements in the correct zone", func() { - const domainLabel = "domain-label" + const expectedZoneLabel = "domain-label" test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: domainLabel, + Key: expectedZoneLabel, Operator: v1.NodeSelectorOpExists, }, }) - type mapping struct { - zone string - zoneID string - } - subnetZones := lo.Keys(env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName})) - zones := lo.Filter(lo.MapToSlice(env.GetZoneIDMapping(), func(zone, zoneID string) mapping { - return mapping{zone, zoneID} - }), func(m mapping, _ int) bool { - return lo.Contains(subnetZones, m.zone) - }) - - pods := lo.Times(len(zones), func(index int) *v1.Pod { + // zoneIDMapping is a mapping from zone to zone-id for all zoneIDMapping available in the provided subnets + zoneIDMapping := lo.PickByKeys(env.GetZoneIDMapping(), lo.Keys(env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName}))) + pods := lo.MapToSlice(zoneIDMapping, func(zone, zoneID string) *v1.Pod { return test.Pod(test.PodOptions{ NodeRequirements: []v1.NodeSelectorRequirement{ { - Key: domainLabel, + Key: expectedZoneLabel, Operator: v1.NodeSelectorOpIn, - Values: []string{fmt.Sprintf("%d", index)}, + Values: []string{zone}, }, { Key: v1beta1.LabelInstanceAvailabilityZoneID, Operator: v1.NodeSelectorOpIn, - Values: []string{zones[index].zoneID}, + Values: []string{zoneID}, }, }, }) }) + env.ExpectCreated(nodePool, nodeClass) for _, pod := range pods { env.ExpectCreated(pod) } - nodes := env.EventuallyExpectCreatedNodeCount("==", len(zones)) + nodes := env.EventuallyExpectCreatedNodeCount("==", len(zoneIDMapping)) for _, node := range nodes { - domainValue, ok := node.Labels[domainLabel] + expectedZone, ok := node.Labels[expectedZoneLabel] Expect(ok).To(BeTrue()) - expectedIndex, err := strconv.Atoi(domainValue) - Expect(err).To(BeNil()) - Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(zones[expectedIndex].zone)) - Expect(node.Labels[v1beta1.LabelInstanceAvailabilityZoneID]).To(Equal(zones[expectedIndex].zoneID)) + Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(expectedZone)) + Expect(node.Labels[v1beta1.LabelInstanceAvailabilityZoneID]).To(Equal(zoneIDMapping[expectedZone])) } }) }) From d61bb7b4bc9ecf9415b7bb3742cbe9843cd83376 Mon Sep 17 00:00:00 2001 From: Reed Schalo Date: Wed, 22 May 2024 15:43:04 -0700 Subject: [PATCH 07/17] remove helper function refs --- go.mod | 2 +- go.sum | 2 -- pkg/apis/v1beta1/labels.go | 4 ++-- pkg/cloudprovider/cloudprovider.go | 2 +- pkg/cloudprovider/suite_test.go | 2 +- .../providers/instancetype/suite_test.go | 3 ++- pkg/providers/instance/instance.go | 16 +++++++-------- pkg/providers/instancetype/instancetype.go | 2 +- pkg/providers/instancetype/suite_test.go | 20 +++++++++---------- pkg/providers/instancetype/types.go | 8 ++++---- pkg/providers/subnet/subnet.go | 4 ++-- test/suites/integration/scheduling_test.go | 12 +++++------ 12 files changed, 37 insertions(+), 40 deletions(-) diff --git a/go.mod b/go.mod index 5b5115bca4aa..21a342ec861a 100644 --- a/go.mod +++ b/go.mod @@ -117,4 +117,4 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) -replace sigs.k8s.io/karpenter => github.com/rschalo/karpenter v0.0.0-20240521231044-957333ae5ef7 +replace sigs.k8s.io/karpenter => /Users/schalor/Documents/github.com/rschalo/karpenter diff --git a/go.sum b/go.sum index ced4dcd44f31..c7ad2ee991b4 100644 --- a/go.sum +++ b/go.sum @@ -331,8 +331,6 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/rschalo/karpenter v0.0.0-20240521231044-957333ae5ef7 h1:4akrWjwsoM0EH2dtF8UklHnBa6IIE1+6icY+WMt7Rr4= -github.com/rschalo/karpenter v0.0.0-20240521231044-957333ae5ef7/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index e9e27bffd015..ac72ab48b908 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -46,7 +46,7 @@ func init() { LabelInstanceAcceleratorName, LabelInstanceAcceleratorManufacturer, LabelInstanceAcceleratorCount, - LabelInstanceAvailabilityZoneID, + LabelTopologyZoneID, v1.LabelWindowsBuild, ) } @@ -95,7 +95,7 @@ var ( LabelNodeClass = Group + "/ec2nodeclass" - LabelInstanceAvailabilityZoneID = "topology.k8s.aws/zone-id" + LabelTopologyZoneID = "topology.k8s.aws/zone-id" LabelInstanceHypervisor = Group + "/instance-hypervisor" LabelInstanceEncryptionInTransitSupported = Group + "/instance-encryption-in-transit-supported" diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index c9d561d705ae..b9bf258343b2 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -316,7 +316,7 @@ func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType * nodeClaim.Status.Allocatable = functional.FilterMap(instanceType.Allocatable(), resourceFilter) } labels[v1.LabelTopologyZone] = i.Zone - labels[v1beta1.LabelInstanceAvailabilityZoneID] = c.subnetProvider.ZoneInfo()[i.Zone] + labels[v1beta1.LabelTopologyZoneID] = c.subnetProvider.ZoneInfo()[i.Zone] labels[corev1beta1.CapacityTypeLabelKey] = i.CapacityType if v, ok := i.Tags[corev1beta1.NodePoolLabelKey]; ok { labels[corev1beta1.NodePoolLabelKey] = v diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 87e2fc7aabb1..631e77ec89d2 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -215,7 +215,7 @@ var _ = Describe("CloudProvider", func() { Expect(cloudProviderNodeClaim).ToNot(BeNil()) zone, ok := cloudProviderNodeClaim.GetLabels()[v1.LabelTopologyZone] Expect(ok).To(BeTrue()) - zoneID, ok := cloudProviderNodeClaim.GetLabels()[v1beta1.LabelInstanceAvailabilityZoneID] + zoneID, ok := cloudProviderNodeClaim.GetLabels()[v1beta1.LabelTopologyZoneID] Expect(ok).To(BeTrue()) Expect(zoneID).To(Equal(awsEnv.SubnetProvider.ZoneInfo()[zone])) }) diff --git a/pkg/controllers/providers/instancetype/suite_test.go b/pkg/controllers/providers/instancetype/suite_test.go index 5b0fba5fc2a1..4a156e5d9ae6 100644 --- a/pkg/controllers/providers/instancetype/suite_test.go +++ b/pkg/controllers/providers/instancetype/suite_test.go @@ -18,6 +18,7 @@ import ( "context" "testing" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" @@ -150,7 +151,7 @@ var _ = Describe("InstanceType", func() { }) Expect(found).To(BeTrue()) for y := range instanceTypes[x].Offerings { - Expect(instanceTypes[x].Offerings[y].Zone()).To(Equal(lo.FromPtr(offering.Location))) + Expect(instanceTypes[x].Offerings[y].Requirements.Get(v1.LabelTopologyZone).Any()).To(Equal(lo.FromPtr(offering.Location))) } } }) diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index c00c0f9ff96a..b50ff3abce07 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -338,13 +338,10 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy } var overrides []*ec2.FleetLaunchTemplateOverridesRequest for _, offering := range unwrappedOfferings { - if capacityType != offering.CapacityType() { + if reqs.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { continue } - if !reqs.Get(v1.LabelTopologyZone).Has(offering.Zone()) || !reqs.Get(v1beta1.LabelInstanceAvailabilityZoneID).Has(offering.Requirements.Get(v1beta1.LabelInstanceAvailabilityZoneID).Values()[0]) { - continue - } - subnet, ok := zonalSubnets[offering.Zone()] + subnet, ok := zonalSubnets[offering.Requirements.Get(v1.LabelTopologyZone).Any()] if !ok { continue } @@ -377,7 +374,8 @@ func (p *DefaultProvider) getCapacityType(nodeClaim *corev1beta1.NodeClaim, inst if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Get(v1.LabelTopologyZone).Has(offering.Zone()) && offering.CapacityType() == corev1beta1.CapacityTypeSpot { + if requirements.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) == nil && + offering.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() == corev1beta1.CapacityTypeSpot { return corev1beta1.CapacityTypeSpot } } @@ -412,10 +410,10 @@ func (p *DefaultProvider) isMixedCapacityLaunch(nodeClaim *corev1beta1.NodeClaim if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if !requirements.Get(v1.LabelTopologyZone).Has(offering.Zone()) || !requirements.Get(v1beta1.LabelInstanceAvailabilityZoneID).Has(offering.Requirements.Get(v1beta1.LabelInstanceAvailabilityZoneID).Values()[0]) { + if requirements.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { continue } - if offering.CapacityType() == corev1beta1.CapacityTypeSpot { + if offering.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() == corev1beta1.CapacityTypeSpot { hasSpotOfferings = true } else { hasODOffering = true @@ -433,7 +431,7 @@ func filterUnwantedSpot(instanceTypes []*cloudprovider.InstanceType) []*cloudpro // first, find the price of our cheapest available on-demand instance type that could support this node for _, it := range instanceTypes { for _, o := range it.Offerings.Available() { - if o.CapacityType() == corev1beta1.CapacityTypeOnDemand && o.Price < cheapestOnDemand { + if o.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() == corev1beta1.CapacityTypeOnDemand && o.Price < cheapestOnDemand { cheapestOnDemand = o.Price } } diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 277b448b00d8..9fc2795b5e25 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -279,7 +279,7 @@ func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2 Requirements: scheduling.NewRequirements( scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, zone), - scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, zoneInfo[zone]), + scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, zoneInfo[zone]), ), Price: price, Available: available, diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index fbab07d8a4cd..185716c441a9 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -245,7 +245,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1", - v1beta1.LabelInstanceAvailabilityZoneID: "tstz1-1a", + v1beta1.LabelTopologyZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -297,7 +297,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceGPUCount: "1", v1beta1.LabelInstanceGPUMemory: "16384", v1beta1.LabelInstanceLocalNVME: "900", - v1beta1.LabelInstanceAvailabilityZoneID: "tstz1-1a", + v1beta1.LabelTopologyZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -348,7 +348,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1", - v1beta1.LabelInstanceAvailabilityZoneID: "tstz1-1a", + v1beta1.LabelTopologyZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -881,8 +881,8 @@ var _ = Describe("InstanceTypeProvider", func() { for _, of := range it.Offerings { metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_available", map[string]string{ "instance_type": it.Name, - "capacity_type": of.CapacityType(), - "zone": of.Zone(), + "capacity_type": of.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any(), + "zone": of.Requirements.Get(v1.LabelTopologyZone).Any(), }) Expect(ok).To(BeTrue()) Expect(metric).To(Not(BeNil())) @@ -899,8 +899,8 @@ var _ = Describe("InstanceTypeProvider", func() { for _, of := range it.Offerings { metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_price_estimate", map[string]string{ "instance_type": it.Name, - "capacity_type": of.CapacityType(), - "zone": of.Zone(), + "capacity_type": of.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any(), + "zone": of.Requirements.Get(v1.LabelTopologyZone).Any(), }) Expect(ok).To(BeTrue()) Expect(metric).To(Not(BeNil())) @@ -2389,16 +2389,16 @@ func generateSpotPricing(cp *cloudprovider.CloudProvider, nodePool *corev1beta1. instanceType := it onDemandPrice := 1.00 for _, o := range it.Offerings { - if o.CapacityType() == corev1beta1.CapacityTypeOnDemand { + if o.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() == corev1beta1.CapacityTypeOnDemand { onDemandPrice = o.Price } } for _, o := range instanceType.Offerings { o := o - if o.CapacityType() != corev1beta1.CapacityTypeSpot { + if o.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() != corev1beta1.CapacityTypeSpot { continue } - zone := o.Zone() + zone := o.Requirements.Get(v1.LabelTopologyZone).Any() spotPrice := fmt.Sprintf("%0.3f", onDemandPrice*0.5) rsp.SpotPriceHistory = append(rsp.SpotPriceHistory, &ec2.SpotPrice{ AvailabilityZone: &zone, diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index be68681af08d..b06911d7b71c 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -79,13 +79,13 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, getArchitecture(info)), scheduling.NewRequirement(v1.LabelOSStable, v1.NodeSelectorOpIn, getOS(info, amiFamily)...), scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.Zone() + return o.Requirements.Get(v1.LabelTopologyZone).Any() })...), scheduling.NewRequirement(v1.LabelTopologyRegion, v1.NodeSelectorOpIn, region), scheduling.NewRequirement(v1.LabelWindowsBuild, v1.NodeSelectorOpDoesNotExist), // Well Known to Karpenter scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.CapacityType() + return o.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() })...), // Well Known to AWS scheduling.NewRequirement(v1beta1.LabelInstanceCPU, v1.NodeSelectorOpIn, fmt.Sprint(aws.Int64Value(info.VCpuInfo.DefaultVCpus))), @@ -107,8 +107,8 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1beta1.LabelInstanceHypervisor, v1.NodeSelectorOpIn, aws.StringValue(info.Hypervisor)), scheduling.NewRequirement(v1beta1.LabelInstanceEncryptionInTransitSupported, v1.NodeSelectorOpIn, fmt.Sprint(aws.BoolValue(info.NetworkInfo.EncryptionInTransitSupported))), - scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.Requirements.Get(v1beta1.LabelInstanceAvailabilityZoneID).Values()[0] + scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { + return o.Requirements.Get(v1beta1.LabelTopologyZoneID).Any() })...), ) // Instance Type Labels diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index 42bd53b821aa..688498c85c85 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -180,7 +180,7 @@ func (p *DefaultProvider) ZonalSubnetsForLaunch(ctx context.Context, nodeClass * reqs := scheduling.NewRequirements( scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, subnet.Zone), - scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, subnet.ZoneID), + scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, subnet.ZoneID), ) predictedIPsUsed := p.minPods(instanceTypes, reqs) prevIPs := subnet.AvailableIPAddressCount @@ -247,7 +247,7 @@ func (p *DefaultProvider) UpdateInflightIPs(createFleetInput *ec2.CreateFleetInp reqs := scheduling.NewRequirements( scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, originalSubnet.Zone), - scheduling.NewRequirement(v1beta1.LabelInstanceAvailabilityZoneID, v1.NodeSelectorOpIn, originalSubnet.ZoneID), + scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, originalSubnet.ZoneID), ) minPods := p.minPods(instanceTypes, reqs) p.inflightIPs[originalSubnet.ID] = ips + minPods diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index baec7b14862e..9354b38b37fa 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -109,11 +109,11 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.ExpectCreatedNodeCount("==", 1) }) It("should support well-known labels for zone id selection", func() { - selectors.Insert(v1beta1.LabelInstanceAvailabilityZoneID) // Add node selector keys to selectors used in testing to ensure we test all labels + selectors.Insert(v1beta1.LabelTopologyZoneID) // Add node selector keys to selectors used in testing to ensure we test all labels deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ NodeRequirements: []v1.NodeSelectorRequirement{ { - Key: v1beta1.LabelInstanceAvailabilityZoneID, + Key: v1beta1.LabelTopologyZoneID, Operator: v1.NodeSelectorOpIn, Values: func() []string { zoneIDMapping := env.GetZoneIDMapping() @@ -625,7 +625,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { Values: lo.Map(zones[0:2], func(m mapping, _ int) string { return m.zone }), }, { - Key: v1beta1.LabelInstanceAvailabilityZoneID, + Key: v1beta1.LabelTopologyZoneID, Operator: v1.NodeSelectorOpIn, Values: lo.Map(zones[1:3], func(m mapping, _ int) string { return m.zoneID }), }, @@ -634,7 +634,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.ExpectCreated(nodePool, nodeClass, pod) node := env.EventuallyExpectNodeCount("==", 1)[0] Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(zones[1].zone)) - Expect(node.Labels[v1beta1.LabelInstanceAvailabilityZoneID]).To(Equal(zones[1].zoneID)) + Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(zones[1].zoneID)) }) It("should provision nodes for pods with zone-id requirements in the correct zone", func() { const expectedZoneLabel = "domain-label" @@ -656,7 +656,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { Values: []string{zone}, }, { - Key: v1beta1.LabelInstanceAvailabilityZoneID, + Key: v1beta1.LabelTopologyZoneID, Operator: v1.NodeSelectorOpIn, Values: []string{zoneID}, }, @@ -673,7 +673,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { expectedZone, ok := node.Labels[expectedZoneLabel] Expect(ok).To(BeTrue()) Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(expectedZone)) - Expect(node.Labels[v1beta1.LabelInstanceAvailabilityZoneID]).To(Equal(zoneIDMapping[expectedZone])) + Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(zoneIDMapping[expectedZone])) } }) }) From f458a442bc590c91c29facb6a9a78d26abb1f547 Mon Sep 17 00:00:00 2001 From: Reed Schalo Date: Wed, 22 May 2024 15:52:30 -0700 Subject: [PATCH 08/17] temp: pin to upstream PR --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 21a342ec861a..df70b24247b6 100644 --- a/go.mod +++ b/go.mod @@ -117,4 +117,4 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) -replace sigs.k8s.io/karpenter => /Users/schalor/Documents/github.com/rschalo/karpenter +replace sigs.k8s.io/karpenter => github.com/rschalo/karpenter v0.0.0-20240522224509-25439a5a6d1e diff --git a/go.sum b/go.sum index c7ad2ee991b4..318a4752854d 100644 --- a/go.sum +++ b/go.sum @@ -331,6 +331,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/rschalo/karpenter v0.0.0-20240522224509-25439a5a6d1e h1:kYqGh4x5ofTVw89Sufi2LqvCk5tnAj7eBy7qYAuwQmY= +github.com/rschalo/karpenter v0.0.0-20240522224509-25439a5a6d1e/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= From 739834ae1796b3ed662e8dde07506b00b75c5e70 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 22 May 2024 16:11:40 -0700 Subject: [PATCH 09/17] fix ct check --- pkg/providers/instance/instance.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index b50ff3abce07..1932982be07c 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -338,6 +338,9 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy } var overrides []*ec2.FleetLaunchTemplateOverridesRequest for _, offering := range unwrappedOfferings { + if capacityType != offering.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() { + continue + } if reqs.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { continue } From 7d10fc8b35eecb98e109703d84f8ebb720740e72 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 22 May 2024 16:51:33 -0700 Subject: [PATCH 10/17] subnet test update --- pkg/providers/subnet/suite_test.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index ac63b79e34ff..6d8eacaa123a 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -16,6 +16,7 @@ package subnet_test import ( "context" + "fmt" "sort" "sync" "testing" @@ -103,11 +104,12 @@ var _ = Describe("SubnetProvider", func() { } subnets, err := awsEnv.SubnetProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) + fmt.Printf("subnets: %v\n", subnets) ExpectConsistsOfSubnets([]*ec2.Subnet{ { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), - AvailabilityZoneId: lo.ToPtr(""), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -127,11 +129,13 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, { SubnetId: lo.ToPtr("subnet-test2"), AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -153,11 +157,13 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, { SubnetId: lo.ToPtr("subnet-test2"), AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -174,6 +180,7 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -193,11 +200,13 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, { SubnetId: lo.ToPtr("subnet-test2"), AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -215,6 +224,7 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test2"), AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -412,6 +422,7 @@ func ExpectConsistsOfSubnets(expected, actual []*ec2.Subnet) { for _, elem := range expected { _, ok := lo.Find(actual, func(s *ec2.Subnet) bool { return lo.FromPtr(s.SubnetId) == lo.FromPtr(elem.SubnetId) && + lo.FromPtr(s.AvailabilityZoneId) == lo.FromPtr(elem.AvailabilityZoneId) && lo.FromPtr(s.AvailabilityZone) == lo.FromPtr(elem.AvailabilityZone) && lo.FromPtr(s.AvailableIpAddressCount) == lo.FromPtr(elem.AvailableIpAddressCount) }) From 314e7efa28de4ab0b659a74d98c78e4e3c802cd3 Mon Sep 17 00:00:00 2001 From: Reed Schalo Date: Wed, 22 May 2024 21:58:00 -0700 Subject: [PATCH 11/17] Update pkg/cloudprovider/suite_test.go Co-authored-by: Jonathan Innis --- pkg/cloudprovider/suite_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 631e77ec89d2..0b07ee26cdcd 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -211,7 +211,7 @@ var _ = Describe("CloudProvider", func() { It("should return availability zone ID as a label on the nodeClaim", func() { ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim) - Expect(err).To(BeNil()) + Expect(err).ToNot(HaveOccurred()) Expect(cloudProviderNodeClaim).ToNot(BeNil()) zone, ok := cloudProviderNodeClaim.GetLabels()[v1.LabelTopologyZone] Expect(ok).To(BeTrue()) From b9b9c554a6c3752c4089274553dd0312fac158a2 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Thu, 23 May 2024 14:16:16 -0700 Subject: [PATCH 12/17] test: zone refactor --- test/pkg/environment/aws/environment.go | 15 +++++++ test/pkg/environment/aws/expectations.go | 26 ++++------- test/suites/drift/suite_test.go | 2 +- test/suites/integration/scheduling_test.go | 45 +++++++------------ test/suites/integration/subnet_test.go | 2 +- test/suites/localzone/suite_test.go | 7 ++- .../nodeclaim/garbage_collection_test.go | 2 +- 7 files changed, 48 insertions(+), 51 deletions(-) diff --git a/test/pkg/environment/aws/environment.go b/test/pkg/environment/aws/environment.go index e874cf325ce1..f3d9be256215 100644 --- a/test/pkg/environment/aws/environment.go +++ b/test/pkg/environment/aws/environment.go @@ -82,6 +82,13 @@ type Environment struct { ClusterEndpoint string InterruptionQueue string PrivateCluster bool + ZoneInfo []ZoneInfo +} + +type ZoneInfo struct { + Zone string + ZoneID string + ZoneType string } func NewEnvironment(t *testing.T) *Environment { @@ -123,6 +130,14 @@ func NewEnvironment(t *testing.T) *Environment { out := lo.Must(sqsapi.GetQueueUrlWithContext(env.Context, &servicesqs.GetQueueUrlInput{QueueName: aws.String(v)})) awsEnv.SQSProvider = lo.Must(sqs.NewDefaultProvider(sqsapi, lo.FromPtr(out.QueueUrl))) } + // Populate ZoneInfo for all AZs in the region + awsEnv.ZoneInfo = lo.Map(lo.Must(awsEnv.EC2API.DescribeAvailabilityZones(&ec2.DescribeAvailabilityZonesInput{})).AvailabilityZones, func(zone *ec2.AvailabilityZone, _ int) ZoneInfo { + return ZoneInfo{ + Zone: lo.FromPtr(zone.ZoneName), + ZoneID: lo.FromPtr(zone.ZoneId), + ZoneType: lo.FromPtr(zone.ZoneType), + } + }) return awsEnv } diff --git a/test/pkg/environment/aws/expectations.go b/test/pkg/environment/aws/expectations.go index 651d5541c6de..f87261716841 100644 --- a/test/pkg/environment/aws/expectations.go +++ b/test/pkg/environment/aws/expectations.go @@ -210,22 +210,6 @@ func (env *Environment) GetSpotInstanceRequest(id *string) *ec2.SpotInstanceRequ return siro.SpotInstanceRequests[0] } -// GetZones returns all available zones mapped from zone -> zone type -func (env *Environment) GetZones() map[string]string { - output := lo.Must(env.EC2API.DescribeAvailabilityZones(&ec2.DescribeAvailabilityZonesInput{})) - return lo.Associate(output.AvailabilityZones, func(zone *ec2.AvailabilityZone) (string, string) { - return lo.FromPtr(zone.ZoneName), lo.FromPtr(zone.ZoneType) - }) -} - -// GetZoneIDMapping returns map from zone to zone id -func (env *Environment) GetZoneIDMapping() map[string]string { - output := lo.Must(env.EC2API.DescribeAvailabilityZones(&ec2.DescribeAvailabilityZonesInput{})) - return lo.Associate(output.AvailabilityZones, func(zone *ec2.AvailabilityZone) (string, string) { - return lo.FromPtr(zone.ZoneName), lo.FromPtr(zone.ZoneId) - }) -} - // GetSubnets returns all subnets matching the label selector // mapped from AZ -> {subnet-ids...} func (env *Environment) GetSubnets(tags map[string]string) map[string][]string { @@ -251,10 +235,11 @@ func (env *Environment) GetSubnets(tags map[string]string) map[string][]string { type SubnetInfo struct { Name string ID string + ZoneInfo } -// GetSubnetNameAndIds returns all subnets matching the label selector -func (env *Environment) GetSubnetNameAndIds(tags map[string]string) []SubnetInfo { +// GetSubnetInfo returns all subnets matching the label selector +func (env *Environment) GetSubnetInfo(tags map[string]string) []SubnetInfo { var filters []*ec2.Filter for key, val := range tags { filters = append(filters, &ec2.Filter{ @@ -269,6 +254,11 @@ func (env *Environment) GetSubnetNameAndIds(tags map[string]string) []SubnetInfo if tag, ok := lo.Find(s.Tags, func(t *ec2.Tag) bool { return aws.StringValue(t.Key) == "Name" }); ok { elem.Name = aws.StringValue(tag.Value) } + if info, ok := lo.Find(env.ZoneInfo, func(info ZoneInfo) bool { + return aws.StringValue(s.AvailabilityZone) == info.Zone + }); ok { + elem.ZoneInfo = info + } return elem }) return true diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index dddd87542a3e..a910a009ab60 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -515,7 +515,7 @@ var _ = Describe("Drift", func() { env.EventuallyExpectHealthyPodCount(selector, numPods) }) It("should disrupt nodes that have drifted due to subnets", func() { - subnets := env.GetSubnetNameAndIds(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) Expect(len(subnets)).To(BeNumerically(">", 1)) nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{{ID: subnets[0].ID}} diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index 9354b38b37fa..165e8ec1fbed 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -115,12 +115,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { { Key: v1beta1.LabelTopologyZoneID, Operator: v1.NodeSelectorOpIn, - Values: func() []string { - zoneIDMapping := env.GetZoneIDMapping() - subnetZones := lo.Keys(env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName})) - targetZoneID := zoneIDMapping[subnetZones[0]] - return []string{targetZoneID} - }(), + Values: []string{env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName})[0].ZoneInfo.ZoneID}, }, }, }}) @@ -603,17 +598,8 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }) It("should provision a node for a pod with overlapping zone and zone-id requirements", func() { - type mapping struct { - zone string - zoneID string - } - subnetZones := lo.Keys(env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName})) - zones := lo.Filter(lo.MapToSlice(env.GetZoneIDMapping(), func(zone, zoneID string) mapping { - return mapping{zone, zoneID} - }), func(m mapping, _ int) bool { - return lo.Contains(subnetZones, m.zone) - }) - Expect(len(zones)).To(BeNumerically(">=", 3)) + subnetInfo := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(len(subnetInfo)).To(BeNumerically(">=", 3)) // Create a pod with 'overlapping' zone and zone-id requirements. With two options for each label, but only one pair of zone-zoneID that maps to the // same AZ, we will always expect the pod to be scheduled to that AZ. In this case, this is the mapping at zone[1]. @@ -622,19 +608,19 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { { Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, - Values: lo.Map(zones[0:2], func(m mapping, _ int) string { return m.zone }), + Values: lo.Map(subnetInfo[0:2], func(info aws.SubnetInfo, _ int) string { return info.Zone }), }, { Key: v1beta1.LabelTopologyZoneID, Operator: v1.NodeSelectorOpIn, - Values: lo.Map(zones[1:3], func(m mapping, _ int) string { return m.zoneID }), + Values: lo.Map(subnetInfo[1:3], func(info aws.SubnetInfo, _ int) string { return info.Zone }), }, }, }) env.ExpectCreated(nodePool, nodeClass, pod) node := env.EventuallyExpectNodeCount("==", 1)[0] - Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(zones[1].zone)) - Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(zones[1].zoneID)) + Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(subnetInfo[1].Zone)) + Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(subnetInfo[1].ZoneID)) }) It("should provision nodes for pods with zone-id requirements in the correct zone", func() { const expectedZoneLabel = "domain-label" @@ -645,20 +631,19 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }, }) - // zoneIDMapping is a mapping from zone to zone-id for all zoneIDMapping available in the provided subnets - zoneIDMapping := lo.PickByKeys(env.GetZoneIDMapping(), lo.Keys(env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName}))) - pods := lo.MapToSlice(zoneIDMapping, func(zone, zoneID string) *v1.Pod { + subnetInfo := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + pods := lo.Map(subnetInfo, func(info aws.SubnetInfo, _ int) *v1.Pod { return test.Pod(test.PodOptions{ NodeRequirements: []v1.NodeSelectorRequirement{ { Key: expectedZoneLabel, Operator: v1.NodeSelectorOpIn, - Values: []string{zone}, + Values: []string{info.Zone}, }, { Key: v1beta1.LabelTopologyZoneID, Operator: v1.NodeSelectorOpIn, - Values: []string{zoneID}, + Values: []string{info.ZoneID}, }, }, }) @@ -668,12 +653,16 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { for _, pod := range pods { env.ExpectCreated(pod) } - nodes := env.EventuallyExpectCreatedNodeCount("==", len(zoneIDMapping)) + nodes := env.EventuallyExpectCreatedNodeCount("==", len(subnetInfo)) for _, node := range nodes { expectedZone, ok := node.Labels[expectedZoneLabel] Expect(ok).To(BeTrue()) Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(expectedZone)) - Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(zoneIDMapping[expectedZone])) + zoneInfo, ok := lo.Find(subnetInfo, func(info aws.SubnetInfo) bool { + return info.Zone == expectedZone + }) + Expect(ok).To(BeTrue()) + Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(zoneInfo.ZoneID)) } }) }) diff --git a/test/suites/integration/subnet_test.go b/test/suites/integration/subnet_test.go index c3d53967b786..7eacc0ef28ec 100644 --- a/test/suites/integration/subnet_test.go +++ b/test/suites/integration/subnet_test.go @@ -78,7 +78,7 @@ var _ = Describe("Subnets", func() { }) It("should use the subnet tag selector with multiple tag values", func() { // Get all the subnets for the cluster - subnets := env.GetSubnetNameAndIds(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) Expect(len(subnets)).To(BeNumerically(">", 1)) firstSubnet := subnets[0] lastSubnet := subnets[len(subnets)-1] diff --git a/test/suites/localzone/suite_test.go b/test/suites/localzone/suite_test.go index 53b0d4940773..8c3ecc034b6c 100644 --- a/test/suites/localzone/suite_test.go +++ b/test/suites/localzone/suite_test.go @@ -70,8 +70,11 @@ var _ = BeforeEach(func() { NodeSelectorRequirement: v1.NodeSelectorRequirement{ Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, - Values: lo.Keys(lo.PickByValues(env.GetZones(), []string{"local-zone"})), - }}) + Values: lo.FilterMap(env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}), func(info aws.SubnetInfo, _ int) (string, bool) { + return info.Zone, info.ZoneType == "local-zone" + }), + }, + }) }) var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) diff --git a/test/suites/nodeclaim/garbage_collection_test.go b/test/suites/nodeclaim/garbage_collection_test.go index 29b65fb9892e..0872116b00c4 100644 --- a/test/suites/nodeclaim/garbage_collection_test.go +++ b/test/suites/nodeclaim/garbage_collection_test.go @@ -45,7 +45,7 @@ var _ = Describe("GarbageCollection", func() { BeforeEach(func() { securityGroups := env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) - subnets := env.GetSubnetNameAndIds(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) Expect(securityGroups).ToNot(HaveLen(0)) Expect(subnets).ToNot(HaveLen(0)) From 6a197d5ca333172ca1c3cb5b069949c2ca740fad Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 24 May 2024 00:01:27 -0700 Subject: [PATCH 13/17] remove subnet zone-id tracking --- pkg/cloudprovider/cloudprovider.go | 37 +++++++++++++++++++--- pkg/providers/instancetype/instancetype.go | 18 +++++++---- pkg/providers/instancetype/types.go | 11 +++++-- pkg/providers/subnet/subnet.go | 12 +++---- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index b9bf258343b2..13d65739aadb 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -106,7 +106,7 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool { return i.Name == instance.Type }) - nc := c.instanceToNodeClaim(instance, instanceType) + nc := c.instanceToNodeClaim(instance, instanceType, nodeClass) nc.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, @@ -125,7 +125,11 @@ func (c *CloudProvider) List(ctx context.Context) ([]*corev1beta1.NodeClaim, err if err != nil { return nil, fmt.Errorf("resolving instance type, %w", err) } - nodeClaims = append(nodeClaims, c.instanceToNodeClaim(instance, instanceType)) + nc, err := c.resolveNodeClassFromInstance(ctx, instance) + if client.IgnoreNotFound(err) != nil { + return nil, fmt.Errorf("resolving nodeclass, %w", err) + } + nodeClaims = append(nodeClaims, c.instanceToNodeClaim(instance, instanceType, nc)) } return nodeClaims, nil } @@ -144,7 +148,11 @@ func (c *CloudProvider) Get(ctx context.Context, providerID string) (*corev1beta if err != nil { return nil, fmt.Errorf("resolving instance type, %w", err) } - return c.instanceToNodeClaim(instance, instanceType), nil + nc, err := c.resolveNodeClassFromInstance(ctx, instance) + if client.IgnoreNotFound(err) != nil { + return nil, fmt.Errorf("resolving nodeclass, %w", err) + } + return c.instanceToNodeClaim(instance, instanceType, nc), nil } func (c *CloudProvider) LivenessProbe(req *http.Request) error { @@ -279,6 +287,14 @@ func (c *CloudProvider) resolveInstanceTypeFromInstance(ctx context.Context, ins return instanceType, nil } +func (c *CloudProvider) resolveNodeClassFromInstance(ctx context.Context, instance *instance.Instance) (*v1beta1.EC2NodeClass, error) { + np, err := c.resolveNodePoolFromInstance(ctx, instance) + if err != nil { + return nil, fmt.Errorf("resolving nodepool, %w", err) + } + return c.resolveNodeClassFromNodePool(ctx, np) +} + func (c *CloudProvider) resolveNodePoolFromInstance(ctx context.Context, instance *instance.Instance) (*corev1beta1.NodePool, error) { if nodePoolName, ok := instance.Tags[corev1beta1.NodePoolLabelKey]; ok { nodePool := &corev1beta1.NodePool{} @@ -290,7 +306,8 @@ func (c *CloudProvider) resolveNodePoolFromInstance(ctx context.Context, instanc return nil, errors.NewNotFound(schema.GroupResource{Group: corev1beta1.Group, Resource: "nodepools"}, "") } -func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *cloudprovider.InstanceType) *corev1beta1.NodeClaim { +//nolint:gocyclo +func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *cloudprovider.InstanceType, nodeClass *v1beta1.EC2NodeClass) *corev1beta1.NodeClaim { nodeClaim := &corev1beta1.NodeClaim{} labels := map[string]string{} annotations := map[string]string{} @@ -316,7 +333,17 @@ func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType * nodeClaim.Status.Allocatable = functional.FilterMap(instanceType.Allocatable(), resourceFilter) } labels[v1.LabelTopologyZone] = i.Zone - labels[v1beta1.LabelTopologyZoneID] = c.subnetProvider.ZoneInfo()[i.Zone] + // Attempt to resolve the zoneID from the instance's EC2NodeClass' status condition. + // If the EC2NodeClass is nil, we know we're in the List or Get paths, where we don't care about the zone-id value. + // If we're in the Create path, we've already validated the EC2NodeClass exists. In this case, we resolve the zone-id from the status condition + // both when creating offerings and when adding the label. + if nodeClass != nil { + if subnet, ok := lo.Find(nodeClass.Status.Subnets, func(s v1beta1.Subnet) bool { + return s.Zone == i.Zone + }); ok && subnet.ZoneID != "" { + labels[v1beta1.LabelTopologyZoneID] = subnet.ZoneID + } + } labels[corev1beta1.CapacityTypeLabelKey] = i.CapacityType if v, ok := i.Tags[corev1beta1.NodePoolLabelKey]; ok { labels[corev1beta1.NodePoolLabelKey] = v diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 9fc2795b5e25..a40111129ddc 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -167,7 +167,7 @@ func (p *DefaultProvider) List(ctx context.Context, kc *corev1beta1.KubeletConfi return NewInstanceType(ctx, i, p.region, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, kc.MaxPods, kc.PodsPerCore, kc.KubeReserved, kc.SystemReserved, kc.EvictionHard, kc.EvictionSoft, - amiFamily, p.createOfferings(ctx, i, p.instanceTypeOfferings[aws.StringValue(i.InstanceType)], allZones, subnetZones, p.subnetProvider.ZoneInfo()), + amiFamily, p.createOfferings(ctx, i, allZones, p.instanceTypeOfferings[aws.StringValue(i.InstanceType)], nodeClass.Status.Subnets), ) }) p.instanceTypesCache.SetDefault(key, result) @@ -252,7 +252,7 @@ func (p *DefaultProvider) UpdateInstanceTypeOfferings(ctx context.Context) error return nil } -func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2.InstanceTypeInfo, instanceTypeZones, zones, subnetZones sets.Set[string], zoneInfo map[string]string) []cloudprovider.Offering { +func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2.InstanceTypeInfo, zones, instanceTypeZones sets.Set[string], subnets []v1beta1.Subnet) []cloudprovider.Offering { var offerings []cloudprovider.Offering for zone := range zones { // while usage classes should be a distinct set, there's no guarantee of that @@ -273,17 +273,23 @@ func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2 log.FromContext(ctx).WithValues("capacity-type", capacityType, "instance-type", *instanceType.InstanceType).Error(fmt.Errorf("received unknown capacity type"), "failed parsing offering") continue } - available := !isUnavailable && ok && instanceTypeZones.Has(zone) && subnetZones.Has(zone) - offerings = append(offerings, cloudprovider.Offering{ + subnet, hasSubnet := lo.Find(subnets, func(s v1beta1.Subnet) bool { + return s.Zone == zone + }) + available := !isUnavailable && ok && instanceTypeZones.Has(zone) && hasSubnet + offering := cloudprovider.Offering{ Requirements: scheduling.NewRequirements( scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, zone), - scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, zoneInfo[zone]), ), Price: price, Available: available, - }) + } + if subnet.ZoneID != "" { + offering.Requirements.Add(scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, subnet.ZoneID)) + } + offerings = append(offerings, offering) instanceTypeOfferingAvailable.With(prometheus.Labels{ instanceTypeLabel: *instanceType.InstanceType, capacityTypeLabel: capacityType, diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index b06911d7b71c..f1c9d3a98ae7 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -107,10 +107,15 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1beta1.LabelInstanceHypervisor, v1.NodeSelectorOpIn, aws.StringValue(info.Hypervisor)), scheduling.NewRequirement(v1beta1.LabelInstanceEncryptionInTransitSupported, v1.NodeSelectorOpIn, fmt.Sprint(aws.BoolValue(info.NetworkInfo.EncryptionInTransitSupported))), - scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.Requirements.Get(v1beta1.LabelTopologyZoneID).Any() - })...), ) + // Only add zone-id label when available in offerings. It may not be available if a user has upgraded from a + // previous version of Karpenter w/o zone-id support and the nodeclass subnet status has not yet updated. + if zoneIDs := lo.FilterMap(offerings.Available(), func(o cloudprovider.Offering, _ int) (string, bool) { + zoneID := o.Requirements.Get(v1beta1.LabelTopologyZoneID).Any() + return zoneID, zoneID == "" + }); len(zoneIDs) != 0 { + requirements.Add(scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, zoneIDs...)) + } // Instance Type Labels instanceFamilyParts := instanceTypeScheme.FindStringSubmatch(aws.StringValue(info.InstanceType)) if len(instanceFamilyParts) == 4 { diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index 688498c85c85..89cab3bca4a4 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -177,12 +177,10 @@ func (p *DefaultProvider) ZonalSubnetsForLaunch(ctx context.Context, nodeClass * } for _, subnet := range zonalSubnets { - reqs := scheduling.NewRequirements( + predictedIPsUsed := p.minPods(instanceTypes, scheduling.NewRequirements( scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, subnet.Zone), - scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, subnet.ZoneID), - ) - predictedIPsUsed := p.minPods(instanceTypes, reqs) + )) prevIPs := subnet.AvailableIPAddressCount if trackedIPs, ok := p.inflightIPs[subnet.ID]; ok { prevIPs = trackedIPs @@ -244,12 +242,10 @@ func (p *DefaultProvider) UpdateInflightIPs(createFleetInput *ec2.CreateFleetInp if originalSubnet.AvailableIPAddressCount == cachedIPAddressCount { // other IPs deducted were opportunistic and need to be readded since Fleet didn't pick those subnets to launch into if ips, ok := p.inflightIPs[originalSubnet.ID]; ok { - reqs := scheduling.NewRequirements( + minPods := p.minPods(instanceTypes, scheduling.NewRequirements( scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, originalSubnet.Zone), - scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, originalSubnet.ZoneID), - ) - minPods := p.minPods(instanceTypes, reqs) + )) p.inflightIPs[originalSubnet.ID] = ips + minPods } } From 758887b23bfc48e6a09974756dbff4ace9950d35 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 24 May 2024 00:26:04 -0700 Subject: [PATCH 14/17] remaining PR feedback --- pkg/cloudprovider/suite_test.go | 15 +++++++++------ pkg/providers/instance/instance.go | 15 ++++++--------- pkg/providers/subnet/subnet.go | 2 +- pkg/providers/subnet/suite_test.go | 1 - test/suites/integration/scheduling_test.go | 2 +- test/suites/nodeclaim/nodeclaim_test.go | 1 - 6 files changed, 17 insertions(+), 19 deletions(-) diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 0b07ee26cdcd..3e2858b45b33 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -134,16 +134,19 @@ var _ = Describe("CloudProvider", func() { }, Subnets: []v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, }, }, diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 1932982be07c..39788c776321 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -299,9 +299,10 @@ func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClas return nil, fmt.Errorf("getting launch templates, %w", err) } requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) + requirements[corev1beta1.CapacityTypeLabelKey] = scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType) for _, launchTemplate := range launchTemplates { launchTemplateConfig := &ec2.FleetLaunchTemplateConfigRequest{ - Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, requirements, capacityType, launchTemplate.ImageID), + Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, requirements, launchTemplate.ImageID), LaunchTemplateSpecification: &ec2.FleetLaunchTemplateSpecificationRequest{ LaunchTemplateName: aws.String(launchTemplate.Name), Version: aws.String("$Latest"), @@ -319,7 +320,7 @@ func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClas // getOverrides creates and returns launch template overrides for the cross product of InstanceTypes and subnets (with subnets being constrained by // zones and the offerings in InstanceTypes) -func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, reqs scheduling.Requirements, capacityType string, image string) []*ec2.FleetLaunchTemplateOverridesRequest { +func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, reqs scheduling.Requirements, image string) []*ec2.FleetLaunchTemplateOverridesRequest { // Unwrap all the offerings to a flat slice that includes a pointer // to the parent instance type name type offeringWithParentName struct { @@ -338,9 +339,6 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy } var overrides []*ec2.FleetLaunchTemplateOverridesRequest for _, offering := range unwrappedOfferings { - if capacityType != offering.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() { - continue - } if reqs.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { continue } @@ -372,13 +370,12 @@ func (p *DefaultProvider) updateUnavailableOfferingsCache(ctx context.Context, e // available offering. The AWS Cloud Provider defaults to [ on-demand ], so spot // must be explicitly included in capacity type requirements. func (p *DefaultProvider) getCapacityType(nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) string { - requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim. - Spec.Requirements...) + requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim. Spec.Requirements...) if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { + requirements[corev1beta1.CapacityTypeLabelKey] = scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, corev1beta1.CapacityTypeSpot) for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) == nil && - offering.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() == corev1beta1.CapacityTypeSpot { + if requirements.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) == nil { return corev1beta1.CapacityTypeSpot } } diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index 89cab3bca4a4..bd94edb487c2 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -121,7 +121,7 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl if p.cm.HasChanged(fmt.Sprintf("subnets/%s", nodeClass.Name), subnets) { log.FromContext(ctx). WithValues("subnets", lo.Map(lo.Values(subnets), func(s *ec2.Subnet, _ int) string { - return fmt.Sprintf("%s (zone: %s, zone-id: %s)", aws.StringValue(s.SubnetId), aws.StringValue(s.AvailabilityZone), aws.StringValue(s.AvailabilityZoneId)) + return fmt.Sprintf(`"subnet": {"id": %q, "zone": %q, "zoneID": %q}`, aws.StringValue(s.SubnetId), aws.StringValue(s.AvailabilityZone), aws.StringValue(s.AvailabilityZoneId)) })). V(1).Info("discovered subnets") } diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index 6d8eacaa123a..1d534dd00edc 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -104,7 +104,6 @@ var _ = Describe("SubnetProvider", func() { } subnets, err := awsEnv.SubnetProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) - fmt.Printf("subnets: %v\n", subnets) ExpectConsistsOfSubnets([]*ec2.Subnet{ { SubnetId: lo.ToPtr("subnet-test1"), diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index 165e8ec1fbed..0473c471bace 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -613,7 +613,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { { Key: v1beta1.LabelTopologyZoneID, Operator: v1.NodeSelectorOpIn, - Values: lo.Map(subnetInfo[1:3], func(info aws.SubnetInfo, _ int) string { return info.Zone }), + Values: lo.Map(subnetInfo[1:3], func(info aws.SubnetInfo, _ int) string { return info.ZoneID }), }, }, }) diff --git a/test/suites/nodeclaim/nodeclaim_test.go b/test/suites/nodeclaim/nodeclaim_test.go index a6f542416f7e..d4e81e4528cd 100644 --- a/test/suites/nodeclaim/nodeclaim_test.go +++ b/test/suites/nodeclaim/nodeclaim_test.go @@ -165,7 +165,6 @@ var _ = Describe("StandaloneNodeClaim", func() { node := env.EventuallyExpectInitializedNodeCount("==", 1)[0] Expect(node.Annotations).To(HaveKeyWithValue("custom-annotation", "custom-value")) Expect(node.Labels).To(HaveKeyWithValue("custom-label", "custom-value")) - Expect(node.Labels).To(HaveKey("topology.k8s.aws/zone-id")) Expect(node.Spec.Taints).To(ContainElements( v1.Taint{ Key: "custom-taint", From c9ccd6acc9bbe6632447a566ed7afe40eb972f83 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 24 May 2024 01:13:12 -0700 Subject: [PATCH 15/17] misc --- Makefile | 1 - cmd/controller/main.go | 1 - .../crds/karpenter.k8s.aws_ec2nodeclasses.yaml | 2 +- pkg/apis/v1beta1/ec2nodeclass_status.go | 2 +- pkg/cloudprovider/cloudprovider.go | 5 +---- pkg/cloudprovider/suite_test.go | 8 ++++++-- .../nodeclaim/garbagecollection/suite_test.go | 2 +- pkg/providers/instance/instance.go | 2 +- pkg/providers/instance/suite_test.go | 2 +- pkg/providers/instancetype/suite_test.go | 2 +- pkg/providers/instancetype/types.go | 2 +- pkg/providers/launchtemplate/suite_test.go | 2 +- pkg/providers/subnet/subnet.go | 12 ------------ pkg/providers/subnet/suite_test.go | 1 - pkg/test/nodeclass.go | 15 +++++++++------ test/suites/integration/scheduling_test.go | 8 ++++++-- 16 files changed, 30 insertions(+), 37 deletions(-) diff --git a/Makefile b/Makefile index 0fcda26e4c2d..1b355fd76f88 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,6 @@ run: ## Run Karpenter controller binary against your local cluster CLUSTER_NAME=${CLUSTER_NAME} \ INTERRUPTION_QUEUE=${CLUSTER_NAME} \ FEATURE_GATES="Drift=true" \ - LOG_LEVEL=debug \ go run ./cmd/controller/main.go test: ## Run tests diff --git a/cmd/controller/main.go b/cmd/controller/main.go index e922684a27cc..9d370a42694d 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -39,7 +39,6 @@ func main() { op.GetClient(), op.AMIProvider, op.SecurityGroupProvider, - op.SubnetProvider, ) lo.Must0(op.AddHealthzCheck("cloud-provider", awsCloudProvider.LivenessProbe)) cloudProvider := metrics.Decorate(awsCloudProvider) diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 8db886dfb1dd..aec3d01d61d4 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -608,7 +608,7 @@ spec: zone: description: The associated availability zone type: string - zoneId: + zoneID: description: The associated availability zone ID type: string required: diff --git a/pkg/apis/v1beta1/ec2nodeclass_status.go b/pkg/apis/v1beta1/ec2nodeclass_status.go index a574152bff2c..50bc4b781879 100644 --- a/pkg/apis/v1beta1/ec2nodeclass_status.go +++ b/pkg/apis/v1beta1/ec2nodeclass_status.go @@ -29,7 +29,7 @@ type Subnet struct { Zone string `json:"zone"` // The associated availability zone ID // +optional - ZoneID string `json:"zoneId"` + ZoneID string `json:"zoneID"` } // SecurityGroup contains resolved SecurityGroup selector values utilized for node launch diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 13d65739aadb..a4c0c83fff16 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -47,7 +47,6 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" - "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" "sigs.k8s.io/karpenter/pkg/cloudprovider" ) @@ -62,18 +61,16 @@ type CloudProvider struct { instanceProvider instance.Provider amiProvider amifamily.Provider securityGroupProvider securitygroup.Provider - subnetProvider subnet.Provider } func New(instanceTypeProvider instancetype.Provider, instanceProvider instance.Provider, recorder events.Recorder, - kubeClient client.Client, amiProvider amifamily.Provider, securityGroupProvider securitygroup.Provider, subnetProvider subnet.Provider) *CloudProvider { + kubeClient client.Client, amiProvider amifamily.Provider, securityGroupProvider securitygroup.Provider) *CloudProvider { return &CloudProvider{ instanceTypeProvider: instanceTypeProvider, instanceProvider: instanceProvider, kubeClient: kubeClient, amiProvider: amiProvider, securityGroupProvider: securityGroupProvider, - subnetProvider: subnetProvider, recorder: recorder, } } diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 3e2858b45b33..f978a161f226 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -84,7 +84,7 @@ var _ = BeforeSuite(func() { fakeClock = clock.NewFakeClock(time.Now()) recorder = events.NewRecorder(&record.FakeRecorder{}) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder, - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster) }) @@ -220,7 +220,11 @@ var _ = Describe("CloudProvider", func() { Expect(ok).To(BeTrue()) zoneID, ok := cloudProviderNodeClaim.GetLabels()[v1beta1.LabelTopologyZoneID] Expect(ok).To(BeTrue()) - Expect(zoneID).To(Equal(awsEnv.SubnetProvider.ZoneInfo()[zone])) + subnet, ok := lo.Find(nodeClass.Status.Subnets, func(s v1beta1.Subnet) bool { + return s.Zone == zone + }) + Expect(ok).To(BeTrue()) + Expect(zoneID).To(Equal(subnet.ZoneID)) }) It("should return NodeClass Hash on the nodeClaim", func() { ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) diff --git a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go index deb83b5e79aa..0446d774da30 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go +++ b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go @@ -65,7 +65,7 @@ var _ = BeforeSuite(func() { env = coretest.NewEnvironment(scheme.Scheme, coretest.WithCRDs(apis.CRDs...)) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) garbageCollectionController = garbagecollection.NewController(env.Client, cloudProvider) }) diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 39788c776321..95c17cefb206 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -370,7 +370,7 @@ func (p *DefaultProvider) updateUnavailableOfferingsCache(ctx context.Context, e // available offering. The AWS Cloud Provider defaults to [ on-demand ], so spot // must be explicitly included in capacity type requirements. func (p *DefaultProvider) getCapacityType(nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) string { - requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim. Spec.Requirements...) + requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { requirements[corev1beta1.CapacityTypeLabelKey] = scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, corev1beta1.CapacityTypeSpot) for _, instanceType := range instanceTypes { diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index a4d146e7f35c..08da5f5053cf 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -64,7 +64,7 @@ var _ = BeforeSuite(func() { ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) }) var _ = AfterSuite(func() { diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 98ab15e344e1..3843909d641a 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -85,7 +85,7 @@ var _ = BeforeSuite(func() { awsEnv = test.NewEnvironment(ctx, env) fakeClock = &clock.FakeClock{} cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) }) diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index 715e4187884f..a4482cf7715d 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -111,7 +111,7 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off // previous version of Karpenter w/o zone-id support and the nodeclass subnet status has not yet updated. if zoneIDs := lo.FilterMap(offerings.Available(), func(o cloudprovider.Offering, _ int) (string, bool) { zoneID := o.Requirements.Get(v1beta1.LabelTopologyZoneID).Any() - return zoneID, zoneID == "" + return zoneID, zoneID != "" }); len(zoneIDs) != 0 { requirements.Add(scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, zoneIDs...)) } diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index dabe878d831b..d2a0d6382ffe 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -94,7 +94,7 @@ var _ = BeforeSuite(func() { fakeClock = &clock.FakeClock{} cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) }) diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index bd94edb487c2..3cd3d3a7e88e 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -43,7 +43,6 @@ type Provider interface { AssociatePublicIPAddressValue(*v1beta1.EC2NodeClass) *bool ZonalSubnetsForLaunch(context.Context, *v1beta1.EC2NodeClass, []*cloudprovider.InstanceType, string) (map[string]*Subnet, error) UpdateInflightIPs(*ec2.CreateFleetInput, *ec2.CreateFleetOutput, []*cloudprovider.InstanceType, []*Subnet, string) - ZoneInfo() map[string]string } type DefaultProvider struct { @@ -54,7 +53,6 @@ type DefaultProvider struct { associatePublicIPAddressCache *cache.Cache cm *pretty.ChangeMonitor inflightIPs map[string]int64 - zoneInfo map[string]string } type Subnet struct { @@ -75,8 +73,6 @@ func NewDefaultProvider(ec2api ec2iface.EC2API, cache *cache.Cache, availableIPA associatePublicIPAddressCache: associatePublicIPAddressCache, // inflightIPs is used to track IPs from known launched instances inflightIPs: map[string]int64{}, - // availabilityZoneIDs is used to track possible az IDs based on discovered subnets - zoneInfo: map[string]string{}, } } @@ -104,7 +100,6 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl if err != nil { return nil, fmt.Errorf("describing subnets %s, %w", pretty.Concise(filters), err) } - availableZones := map[string]string{} for i := range output.Subnets { subnets[lo.FromPtr(output.Subnets[i].SubnetId)] = output.Subnets[i] p.availableIPAddressCache.SetDefault(lo.FromPtr(output.Subnets[i].SubnetId), lo.FromPtr(output.Subnets[i].AvailableIpAddressCount)) @@ -112,10 +107,7 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl // subnets can be leaked here, if a subnets is never called received from ec2 // we are accepting it for now, as this will be an insignificant amount of memory delete(p.inflightIPs, lo.FromPtr(output.Subnets[i].SubnetId)) // remove any previously tracked IP addresses since we just refreshed from EC2 - // add zone name to ID comparison as subnets are discovered - availableZones[aws.StringValue(output.Subnets[i].AvailabilityZone)] = aws.StringValue(output.Subnets[i].AvailabilityZoneId) } - p.zoneInfo = availableZones } p.cache.SetDefault(fmt.Sprint(hash), lo.Values(subnets)) if p.cm.HasChanged(fmt.Sprintf("subnets/%s", nodeClass.Name), subnets) { @@ -252,10 +244,6 @@ func (p *DefaultProvider) UpdateInflightIPs(createFleetInput *ec2.CreateFleetInp } } -func (p *DefaultProvider) ZoneInfo() map[string]string { - return p.zoneInfo -} - func (p *DefaultProvider) LivenessProbe(_ *http.Request) error { p.Lock() //nolint: staticcheck diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index 1d534dd00edc..4760e6ff7354 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -16,7 +16,6 @@ package subnet_test import ( "context" - "fmt" "sort" "sync" "testing" diff --git a/pkg/test/nodeclass.go b/pkg/test/nodeclass.go index df81e6640a95..13f457bdddea 100644 --- a/pkg/test/nodeclass.go +++ b/pkg/test/nodeclass.go @@ -105,16 +105,19 @@ func EC2NodeClass(overrides ...v1beta1.EC2NodeClass) *v1beta1.EC2NodeClass { } options.Status.Subnets = []v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, } } diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index 0bb9e77407bb..d755a65e731d 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -597,7 +597,9 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }) It("should provision a node for a pod with overlapping zone and zone-id requirements", func() { - subnetInfo := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + subnetInfo := lo.UniqBy(env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}), func(s aws.SubnetInfo) string { + return s.Zone + }) Expect(len(subnetInfo)).To(BeNumerically(">=", 3)) // Create a pod with 'overlapping' zone and zone-id requirements. With two options for each label, but only one pair of zone-zoneID that maps to the @@ -630,7 +632,9 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }, }) - subnetInfo := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + subnetInfo := lo.UniqBy(env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}), func(s aws.SubnetInfo) string { + return s.Zone + }) pods := lo.Map(subnetInfo, func(info aws.SubnetInfo, _ int) *v1.Pod { return test.Pod(test.PodOptions{ NodeRequirements: []v1.NodeSelectorRequirement{ From 6ce84f41372594134c1aed313bf890a9b2e1e3a3 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 24 May 2024 01:14:29 -0700 Subject: [PATCH 16/17] deps: pin to upstream --- go.mod | 4 +--- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index df70b24247b6..c47e5d3b6676 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( k8s.io/utils v0.0.0-20240102154912-e7106e64919e knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd sigs.k8s.io/controller-runtime v0.18.2 - sigs.k8s.io/karpenter v0.36.1-0.20240521002315-9b145a6d85b4 + sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181 sigs.k8s.io/yaml v1.4.0 ) @@ -116,5 +116,3 @@ require ( sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) - -replace sigs.k8s.io/karpenter => github.com/rschalo/karpenter v0.0.0-20240522224509-25439a5a6d1e diff --git a/go.sum b/go.sum index 318a4752854d..1ae6cc110fd0 100644 --- a/go.sum +++ b/go.sum @@ -331,8 +331,6 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/rschalo/karpenter v0.0.0-20240522224509-25439a5a6d1e h1:kYqGh4x5ofTVw89Sufi2LqvCk5tnAj7eBy7qYAuwQmY= -github.com/rschalo/karpenter v0.0.0-20240522224509-25439a5a6d1e/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= @@ -763,6 +761,8 @@ sigs.k8s.io/controller-runtime v0.18.2 h1:RqVW6Kpeaji67CY5nPEfRz6ZfFMk0lWQlNrLql sigs.k8s.io/controller-runtime v0.18.2/go.mod h1:tuAt1+wbVsXIT8lPtk5RURxqAnq7xkpv2Mhttslg7Hw= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181 h1:OQlVI9wqaV+VW8y13clzV/tM8sEgm0M/Fs/fVsrnRsY= +sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= From a5f78b16d765924f249199fd6fb6803e22584a6d Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 24 May 2024 11:21:32 -0700 Subject: [PATCH 17/17] remaining comments --- pkg/apis/v1beta1/ec2nodeclass_status.go | 2 +- pkg/providers/instancetype/instancetype.go | 9 +++++++++ pkg/providers/subnet/subnet.go | 11 +++++++---- test/suites/integration/scheduling_test.go | 7 ++++++- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/pkg/apis/v1beta1/ec2nodeclass_status.go b/pkg/apis/v1beta1/ec2nodeclass_status.go index 50bc4b781879..9510e5b0567b 100644 --- a/pkg/apis/v1beta1/ec2nodeclass_status.go +++ b/pkg/apis/v1beta1/ec2nodeclass_status.go @@ -29,7 +29,7 @@ type Subnet struct { Zone string `json:"zone"` // The associated availability zone ID // +optional - ZoneID string `json:"zoneID"` + ZoneID string `json:"zoneID,omitempty"` } // SecurityGroup contains resolved SecurityGroup selector values utilized for node launch diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index a40111129ddc..82bbcb099faa 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -252,6 +252,15 @@ func (p *DefaultProvider) UpdateInstanceTypeOfferings(ctx context.Context) error return nil } +// createOfferings creates a set of mutually exclusive offerings for a given instance type. This provider maintains an +// invariant that each offering is mutually exclusive. Specifically, there is an offering for each permutation of zone +// and capacity type. ZoneID is also injected into the offering requirements, when available, but there is a 1-1 +// mapping between zone and zoneID so this does not change the number of offerings. +// +// Each requirement on the offering is guaranteed to have a single value. To get the value for a requirement on an +// offering, you can do the following thanks to this invariant: +// +// offering.Requirements.Get(v1.TopologyLabelZone).Any() func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2.InstanceTypeInfo, zones, instanceTypeZones sets.Set[string], subnets []v1beta1.Subnet) []cloudprovider.Offering { var offerings []cloudprovider.Offering for zone := range zones { diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index 3cd3d3a7e88e..95bba2532361 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -112,10 +112,13 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl p.cache.SetDefault(fmt.Sprint(hash), lo.Values(subnets)) if p.cm.HasChanged(fmt.Sprintf("subnets/%s", nodeClass.Name), subnets) { log.FromContext(ctx). - WithValues("subnets", lo.Map(lo.Values(subnets), func(s *ec2.Subnet, _ int) string { - return fmt.Sprintf(`"subnet": {"id": %q, "zone": %q, "zoneID": %q}`, aws.StringValue(s.SubnetId), aws.StringValue(s.AvailabilityZone), aws.StringValue(s.AvailabilityZoneId)) - })). - V(1).Info("discovered subnets") + WithValues("subnets", lo.Map(lo.Values(subnets), func(s *ec2.Subnet, _ int) v1beta1.Subnet { + return v1beta1.Subnet{ + ID: lo.FromPtr(s.SubnetId), + Zone: lo.FromPtr(s.AvailabilityZone), + ZoneID: lo.FromPtr(s.AvailabilityZoneId), + } + })).V(1).Info("discovered subnets") } return lo.Values(subnets), nil } diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index d755a65e731d..be328ed5cdd2 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -624,7 +624,12 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(subnetInfo[1].ZoneID)) }) It("should provision nodes for pods with zone-id requirements in the correct zone", func() { - const expectedZoneLabel = "domain-label" + // Each pod specifies a requirement on this expected zone, where the value is the matching zone for the + // required zone-id. This allows us to verify that Karpenter launched the node in the correct zone, even if + // it doesn't add the zone-id label and the label is added by CCM. If we didn't take this approach, we would + // succeed even if Karpenter doesn't add the label and /or incorrectly generated offerings on k8s 1.30 and + // above. This is an unlikely scenario, and adding this check is a defense in depth measure. + const expectedZoneLabel = "expected-zone-label" test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: v1.NodeSelectorRequirement{ Key: expectedZoneLabel,