From 7304f30ee4f00a3a7d1d1a84106660e140b63da6 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Sun, 24 Nov 2024 13:24:29 -0800 Subject: [PATCH] feat: only operate on managed resources (#7423) --- .github/workflows/ci.yaml | 1 + Makefile | 4 +-- .../templates/karpenter.sh_nodeclaims.yaml | 11 ++++++- .../templates/karpenter.sh_nodepools.yaml | 24 ++++++++++---- go.mod | 2 +- go.sum | 4 +-- hack/validation/labels.sh | 13 ++++---- hack/validation/requirements.sh | 20 +++++------ pkg/apis/apis.go | 12 ++++--- pkg/apis/crds/karpenter.sh_nodeclaims.yaml | 11 ++++++- pkg/apis/crds/karpenter.sh_nodepools.yaml | 24 ++++++++++---- pkg/cloudprovider/cloudprovider.go | 17 +++++++--- pkg/cloudprovider/suite_test.go | 2 +- pkg/controllers/controllers.go | 4 +-- pkg/controllers/interruption/controller.go | 27 ++++++++++----- pkg/controllers/interruption/suite_test.go | 11 +++++-- .../nodeclaim/garbagecollection/controller.go | 33 ++++++++++--------- pkg/controllers/nodeclass/hash/controller.go | 13 ++++---- pkg/controllers/nodeclass/hash/suite_test.go | 2 +- .../nodeclass/status/suite_test.go | 2 +- .../nodeclass/termination/controller.go | 9 ++--- .../nodeclass/termination/suite_test.go | 2 +- .../instancetype/capacity/controller.go | 10 ++++-- .../instancetype/capacity/suite_test.go | 25 +++++++++----- pkg/providers/instancetype/instancetype.go | 1 - pkg/providers/instancetype/suite_test.go | 11 +++++-- pkg/providers/launchtemplate/suite_test.go | 20 +++++------ .../al2023_mime_userdata_merged.golden | 2 +- .../al2023_shell_userdata_merged.golden | 2 +- .../testdata/al2023_userdata_unmerged.golden | 2 +- .../al2023_yaml_userdata_merged.golden | 2 +- .../testdata/al2_userdata_merged.golden | 2 +- .../testdata/al2_userdata_unmerged.golden | 2 +- .../testdata/br_userdata_merged.golden | 1 + .../testdata/br_userdata_unmerged.golden | 1 + .../testdata/windows_userdata_merged.golden | 2 +- .../testdata/windows_userdata_unmerged.golden | 2 +- pkg/test/environment.go | 2 ++ pkg/test/nodeclass.go | 15 --------- 39 files changed, 214 insertions(+), 136 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 282c841a77fd..c80540464fbb 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -17,3 +17,4 @@ jobs: - name: Enable the actionlint matcher run: echo "::add-matcher::.github/actionlint-matcher.json" - run: make ci-non-test + shell: bash diff --git a/Makefile b/Makefile index 0ea4e9247aa9..e47067ba0be4 100644 --- a/Makefile +++ b/Makefile @@ -104,8 +104,8 @@ verify: tidy download ## Verify code. Includes dependencies, linting, formatting hack/boilerplate.sh cp $(KARPENTER_CORE_DIR)/pkg/apis/crds/* pkg/apis/crds hack/validation/kubelet.sh - hack/validation/requirements.sh - hack/validation/labels.sh + bash -c 'source ./hack/validation/requirements.sh && injectDomainRequirementRestrictions "karpenter.k8s.aws"' + bash -c 'source ./hack/validation/labels.sh && injectDomainLabelRestrictions "karpenter.k8s.aws"' cp pkg/apis/crds/* charts/karpenter-crd/templates hack/github/dependabot.sh $(foreach dir,$(MOD_DIRS),cd $(dir) && golangci-lint run $(newline)) diff --git a/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml b/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml index e85d305d9aa7..bfe259dea177 100644 --- a/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml +++ b/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml @@ -88,12 +88,21 @@ spec: description: API version of the referent pattern: ^[^/]*$ type: string + x-kubernetes-validations: + - message: group may not be empty + rule: self != '' kind: description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' type: string + x-kubernetes-validations: + - message: kind may not be empty + rule: self != '' name: description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names' type: string + x-kubernetes-validations: + - message: name may not be empty + rule: self != '' required: - group - kind @@ -121,7 +130,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml b/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml index ffe4932aad54..d53ecbb85b56 100644 --- a/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml +++ b/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml @@ -114,11 +114,9 @@ spec: description: |- Reasons is a list of disruption methods that this budget applies to. If Reasons is not set, this budget applies to all methods. Otherwise, this will apply to each reason defined. - allowed reasons are Underutilized, Empty, and Drifted and additional CloudProvider-specific reasons. + allowed reasons are Underutilized, Empty, and Drifted. items: - description: |- - DisruptionReason defines valid reasons for disruption budgets. - CloudProviders will need to append to the list of enums when implementing cloud provider disruption reasons + description: DisruptionReason defines valid reasons for disruption budgets. enum: - Underutilized - Empty @@ -209,7 +207,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: |- @@ -233,17 +231,31 @@ spec: description: API version of the referent pattern: ^[^/]*$ type: string + x-kubernetes-validations: + - message: group may not be empty + rule: self != '' kind: description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' type: string + x-kubernetes-validations: + - message: kind may not be empty + rule: self != '' name: description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names' type: string + x-kubernetes-validations: + - message: name may not be empty + rule: self != '' required: - group - kind - name type: object + x-kubernetes-validations: + - message: nodeClassRef.group is immutable + rule: self.group == oldSelf.group + - message: nodeClassRef.kind is immutable + rule: self.kind == oldSelf.kind requirements: description: Requirements are layered with GetLabels and applied to every node. items: @@ -268,7 +280,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/go.mod b/go.mod index 435ff9fab1d2..cc99778c0574 100644 --- a/go.mod +++ b/go.mod @@ -43,7 +43,7 @@ require ( k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/controller-runtime v0.19.1 - sigs.k8s.io/karpenter v1.0.1-0.20241121192054-9d472b7a5148 + sigs.k8s.io/karpenter v1.0.1-0.20241124090654-63a72bf34b33 sigs.k8s.io/yaml v1.4.0 ) diff --git a/go.sum b/go.sum index 521ec6b861dd..7643289ca538 100644 --- a/go.sum +++ b/go.sum @@ -318,8 +318,8 @@ sigs.k8s.io/controller-runtime v0.19.1 h1:Son+Q40+Be3QWb+niBXAg2vFiYWolDjjRfO8hn sigs.k8s.io/controller-runtime v0.19.1/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v1.0.1-0.20241121192054-9d472b7a5148 h1:fka3VwD7fgslYjdc0Ab9EpE5dMjcxh9U4JmzS6sI/2A= -sigs.k8s.io/karpenter v1.0.1-0.20241121192054-9d472b7a5148/go.mod h1:zolnK/3MxqSPEhEan2VBbzuGdReJPFTbpYWGivwTgic= +sigs.k8s.io/karpenter v1.0.1-0.20241124090654-63a72bf34b33 h1:/u9GR6HPHuduxr3DT8FwsU5KxGt6Oe+RUa5wM+mPpz0= +sigs.k8s.io/karpenter v1.0.1-0.20241124090654-63a72bf34b33/go.mod h1:zolnK/3MxqSPEhEan2VBbzuGdReJPFTbpYWGivwTgic= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/hack/validation/labels.sh b/hack/validation/labels.sh index 3c837d3dcbb3..23aa02d7ee4c 100755 --- a/hack/validation/labels.sh +++ b/hack/validation/labels.sh @@ -1,7 +1,8 @@ -# Labels Validation +# Labels Validation -# # Adding validation for nodepool - -# ## checking for restricted labels while filtering out well known labels -yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.metadata.properties.labels.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self.all(x, x in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\"))"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml \ No newline at end of file +function injectDomainLabelRestrictions() { + domain=$1 + rule="self.all(x, x in [\"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"${domain}\"))" + message="label domain \"${domain}\" is restricted" + MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.metadata.properties.labels.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml +} diff --git a/hack/validation/requirements.sh b/hack/validation/requirements.sh index d6b157fe2880..ccc70e2575fe 100755 --- a/hack/validation/requirements.sh +++ b/hack/validation/requirements.sh @@ -1,13 +1,9 @@ -# Requirements Validation +# Requirements Validation -# Adding validation for nodeclaim - -# v1 -## checking for restricted labels while filtering out well known labels -yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml -# # Adding validation for nodepool - -# ## checking for restricted labels while filtering out well known labels -yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml +function injectDomainRequirementRestrictions() { + domain=$1 + rule="self in [\"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"${domain}\")" + message="label domain \"${domain}\" is restricted" + MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml + MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml +} diff --git a/pkg/apis/apis.go b/pkg/apis/apis.go index 3e255ad9275d..dd995149b6ed 100644 --- a/pkg/apis/apis.go +++ b/pkg/apis/apis.go @@ -20,8 +20,6 @@ import ( "github.com/awslabs/operatorpkg/object" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - - "sigs.k8s.io/karpenter/pkg/apis" ) //go:generate controller-gen crd object:headerFile="../../hack/boilerplate.go.txt" paths="./..." output:crd:artifacts:config=crds @@ -30,7 +28,13 @@ var ( CompatibilityGroup = "compatibility." + Group //go:embed crds/karpenter.k8s.aws_ec2nodeclasses.yaml EC2NodeClassCRD []byte - CRDs = append(apis.CRDs, + //go:embed crds/karpenter.sh_nodepools.yaml + NodePoolCRD []byte + //go:embed crds/karpenter.sh_nodeclaims.yaml + NodeClaimCRD []byte + CRDs = []*apiextensionsv1.CustomResourceDefinition{ object.Unmarshal[apiextensionsv1.CustomResourceDefinition](EC2NodeClassCRD), - ) + object.Unmarshal[apiextensionsv1.CustomResourceDefinition](NodeClaimCRD), + object.Unmarshal[apiextensionsv1.CustomResourceDefinition](NodePoolCRD), + } ) diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index e85d305d9aa7..bfe259dea177 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -88,12 +88,21 @@ spec: description: API version of the referent pattern: ^[^/]*$ type: string + x-kubernetes-validations: + - message: group may not be empty + rule: self != '' kind: description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' type: string + x-kubernetes-validations: + - message: kind may not be empty + rule: self != '' name: description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names' type: string + x-kubernetes-validations: + - message: name may not be empty + rule: self != '' required: - group - kind @@ -121,7 +130,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index ffe4932aad54..d53ecbb85b56 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -114,11 +114,9 @@ spec: description: |- Reasons is a list of disruption methods that this budget applies to. If Reasons is not set, this budget applies to all methods. Otherwise, this will apply to each reason defined. - allowed reasons are Underutilized, Empty, and Drifted and additional CloudProvider-specific reasons. + allowed reasons are Underutilized, Empty, and Drifted. items: - description: |- - DisruptionReason defines valid reasons for disruption budgets. - CloudProviders will need to append to the list of enums when implementing cloud provider disruption reasons + description: DisruptionReason defines valid reasons for disruption budgets. enum: - Underutilized - Empty @@ -209,7 +207,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: |- @@ -233,17 +231,31 @@ spec: description: API version of the referent pattern: ^[^/]*$ type: string + x-kubernetes-validations: + - message: group may not be empty + rule: self != '' kind: description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' type: string + x-kubernetes-validations: + - message: kind may not be empty + rule: self != '' name: description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names' type: string + x-kubernetes-validations: + - message: name may not be empty + rule: self != '' required: - group - kind - name type: object + x-kubernetes-validations: + - message: nodeClassRef.group is immutable + rule: self.group == oldSelf.group + - message: nodeClassRef.kind is immutable + rule: self.kind == oldSelf.kind requirements: description: Requirements are layered with GetLabels and applied to every node. items: @@ -268,7 +280,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index d16e75384981..26f54b69eef5 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -315,11 +315,20 @@ func (c *CloudProvider) resolveInstanceTypeFromInstance(ctx context.Context, ins } func (c *CloudProvider) resolveNodeClassFromInstance(ctx context.Context, instance *instance.Instance) (*v1.EC2NodeClass, error) { - np, err := c.resolveNodePoolFromInstance(ctx, instance) - if err != nil { - return nil, fmt.Errorf("resolving nodepool, %w", err) + name, ok := instance.Tags[v1.NodeClassTagKey] + if !ok { + return nil, errors.NewNotFound(schema.GroupResource{Group: apis.Group, Resource: "ec2nodeclasses"}, "") } - return c.resolveNodeClassFromNodePool(ctx, np) + nc := &v1.EC2NodeClass{} + if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: name}, nc); err != nil { + return nil, fmt.Errorf("resolving ec2nodeclass, %w", err) + } + if !nc.DeletionTimestamp.IsZero() { + // For the purposes of NodeClass CloudProvider resolution, we treat deleting NodeClasses as NotFound, + // but we return a different error message to be clearer to users + return nil, newTerminatingNodeClassError(nc.Name) + } + return nc, nil } func (c *CloudProvider) resolveNodePoolFromInstance(ctx context.Context, instance *instance.Instance) (*karpv1.NodePool, error) { diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index d6cddb6f6fec..d59f95be9b33 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -89,7 +89,7 @@ var _ = BeforeSuite(func() { recorder = events.NewRecorder(&record.FakeRecorder{}) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder, env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) - cluster = state.NewCluster(fakeClock, env.Client) + cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster, fakeClock) }) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index c5e58541f900..6d34b58d7975 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -85,7 +85,7 @@ func NewControllers( nodeclaimtagging.NewController(kubeClient, instanceProvider), controllerspricing.NewController(pricingProvider), controllersinstancetype.NewController(instanceTypeProvider), - controllersinstancetypecapacity.NewController(kubeClient, instanceTypeProvider), + controllersinstancetypecapacity.NewController(kubeClient, cloudProvider, instanceTypeProvider), ssminvalidation.NewController(ssmCache, amiProvider), status.NewController[*v1.EC2NodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter"), status.EmitDeprecatedMetrics), opevents.NewController[*corev1.Node](kubeClient, clk), @@ -93,7 +93,7 @@ func NewControllers( if options.FromContext(ctx).InterruptionQueue != "" { sqsapi := servicesqs.NewFromConfig(cfg) out := lo.Must(sqsapi.GetQueueUrl(ctx, &servicesqs.GetQueueUrlInput{QueueName: lo.ToPtr(options.FromContext(ctx).InterruptionQueue)})) - controllers = append(controllers, interruption.NewController(kubeClient, clk, recorder, lo.Must(sqs.NewDefaultProvider(sqsapi, lo.FromPtr(out.QueueUrl))), unavailableOfferings)) + controllers = append(controllers, interruption.NewController(kubeClient, cloudProvider, clk, recorder, lo.Must(sqs.NewDefaultProvider(sqsapi, lo.FromPtr(out.QueueUrl))), unavailableOfferings)) } return controllers } diff --git a/pkg/controllers/interruption/controller.go b/pkg/controllers/interruption/controller.go index c411c87a4b98..3316b22a7e88 100644 --- a/pkg/controllers/interruption/controller.go +++ b/pkg/controllers/interruption/controller.go @@ -19,6 +19,7 @@ import ( "fmt" "time" + "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/metrics" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" @@ -37,6 +38,7 @@ import ( "sigs.k8s.io/karpenter/pkg/operator/injection" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" "sigs.k8s.io/karpenter/pkg/utils/pretty" "github.com/aws/karpenter-provider-aws/pkg/cache" @@ -60,6 +62,7 @@ const ( // trigger node health events or node spot interruption/rebalance events. type Controller struct { kubeClient client.Client + cloudProvider cloudprovider.CloudProvider clk clock.Clock recorder events.Recorder sqsProvider sqs.Provider @@ -68,11 +71,17 @@ type Controller struct { cm *pretty.ChangeMonitor } -func NewController(kubeClient client.Client, clk clock.Clock, recorder events.Recorder, - sqsProvider sqs.Provider, unavailableOfferingsCache *cache.UnavailableOfferings) *Controller { - +func NewController( + kubeClient client.Client, + cloudProvider cloudprovider.CloudProvider, + clk clock.Clock, + recorder events.Recorder, + sqsProvider sqs.Provider, + unavailableOfferingsCache *cache.UnavailableOfferings, +) *Controller { return &Controller{ kubeClient: kubeClient, + cloudProvider: cloudProvider, clk: clk, recorder: recorder, sqsProvider: sqsProvider, @@ -249,19 +258,19 @@ func (c *Controller) notifyForMessage(msg messages.Message, nodeClaim *karpv1.No // NodeClaim .status.providerID and the NodeClaim func (c *Controller) makeNodeClaimInstanceIDMap(ctx context.Context) (map[string]*karpv1.NodeClaim, error) { m := map[string]*karpv1.NodeClaim{} - nodeClaimList := &karpv1.NodeClaimList{} - if err := c.kubeClient.List(ctx, nodeClaimList); err != nil { + nodeClaims, err := nodeclaimutils.ListManaged(ctx, c.kubeClient, c.cloudProvider) + if err != nil { return nil, err } - for i := range nodeClaimList.Items { - if nodeClaimList.Items[i].Status.ProviderID == "" { + for _, nc := range nodeClaims { + if nc.Status.ProviderID == "" { continue } - id, err := utils.ParseInstanceID(nodeClaimList.Items[i].Status.ProviderID) + id, err := utils.ParseInstanceID(nc.Status.ProviderID) if err != nil || id == "" { continue } - m[id] = &nodeClaimList.Items[i] + m[id] = nc } return m, nil } diff --git a/pkg/controllers/interruption/suite_test.go b/pkg/controllers/interruption/suite_test.go index c867c7a9af2d..11aa11bf36ec 100644 --- a/pkg/controllers/interruption/suite_test.go +++ b/pkg/controllers/interruption/suite_test.go @@ -23,8 +23,6 @@ import ( "sigs.k8s.io/karpenter/pkg/metrics" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" - "github.com/aws/aws-sdk-go-v2/aws" servicesqs "github.com/aws/aws-sdk-go-v2/service/sqs" sqstypes "github.com/aws/aws-sdk-go-v2/service/sqs/types" @@ -44,6 +42,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/apis" awscache "github.com/aws/karpenter-provider-aws/pkg/cache" + "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" "github.com/aws/karpenter-provider-aws/pkg/controllers/interruption" "github.com/aws/karpenter-provider-aws/pkg/controllers/interruption/messages" "github.com/aws/karpenter-provider-aws/pkg/controllers/interruption/messages/scheduledchange" @@ -51,11 +50,13 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/controllers/interruption/messages/statechange" "github.com/aws/karpenter-provider-aws/pkg/fake" "github.com/aws/karpenter-provider-aws/pkg/providers/sqs" + "github.com/aws/karpenter-provider-aws/pkg/test" "github.com/aws/karpenter-provider-aws/pkg/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" . "sigs.k8s.io/karpenter/pkg/test/expectations" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" . "sigs.k8s.io/karpenter/pkg/utils/testing" ) @@ -66,6 +67,7 @@ const ( ) var ctx context.Context +var awsEnv *test.Environment var env *coretest.Environment var sqsapi *fake.SQSAPI var sqsProvider *sqs.DefaultProvider @@ -81,11 +83,14 @@ func TestAPIs(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) + awsEnv = test.NewEnvironment(ctx, env) fakeClock = &clock.FakeClock{} unavailableOfferingsCache = awscache.NewUnavailableOfferings() sqsapi = &fake.SQSAPI{} sqsProvider = lo.Must(sqs.NewDefaultProvider(sqsapi, fmt.Sprintf("https://sqs.%s.amazonaws.com/%s/test-cluster", fake.DefaultRegion, fake.DefaultAccount))) - controller = interruption.NewController(env.Client, fakeClock, events.NewRecorder(&record.FakeRecorder{}), sqsProvider, unavailableOfferingsCache) + cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + controller = interruption.NewController(env.Client, cloudProvider, fakeClock, events.NewRecorder(&record.FakeRecorder{}), sqsProvider, unavailableOfferingsCache) }) var _ = AfterSuite(func() { diff --git a/pkg/controllers/nodeclaim/garbagecollection/controller.go b/pkg/controllers/nodeclaim/garbagecollection/controller.go index d228bf78e3d2..e1f92e2d6ad9 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/controller.go +++ b/pkg/controllers/nodeclaim/garbagecollection/controller.go @@ -33,6 +33,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/operator/injection" + nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" ) @@ -54,32 +55,32 @@ func NewController(kubeClient client.Client, cloudProvider cloudprovider.CloudPr func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { ctx = injection.WithControllerName(ctx, "instance.garbagecollection") - // We LIST machines on the CloudProvider BEFORE we grab Machines/Nodes on the cluster so that we make sure that, if - // LISTing instances takes a long time, our information is more updated by the time we get to Machine and Node LIST - // This works since our CloudProvider instances are deleted based on whether the Machine exists or not, not vise-versa - retrieved, err := c.cloudProvider.List(ctx) + // We LIST NodeClaims on the CloudProvider BEFORE we grab NodeClaims/Nodes on the cluster so that we make sure that, if + // LISTing cloudNodeClaims takes a long time, our information is more updated by the time we get to Node and NodeClaim LIST + // This works since our CloudProvider cloudNodeClaims are deleted based on whether the Machine exists or not, not vise-versa + cloudNodeClaims, err := c.cloudProvider.List(ctx) if err != nil { - return reconcile.Result{}, fmt.Errorf("listing cloudprovider machines, %w", err) + return reconcile.Result{}, fmt.Errorf("listing cloudprovider nodeclaims, %w", err) } - managedRetrieved := lo.Filter(retrieved, func(nc *karpv1.NodeClaim, _ int) bool { + // Filter out any cloudprovider NodeClaim which is already terminating + cloudNodeClaims = lo.Filter(cloudNodeClaims, func(nc *karpv1.NodeClaim, _ int) bool { return nc.DeletionTimestamp.IsZero() }) - nodeClaimList := &karpv1.NodeClaimList{} - if err = c.kubeClient.List(ctx, nodeClaimList); err != nil { + clusterNodeClaims, err := nodeclaimutils.ListManaged(ctx, c.kubeClient, c.cloudProvider) + if err != nil { return reconcile.Result{}, err } + clusterProviderIDs := sets.New(lo.FilterMap(clusterNodeClaims, func(nc *karpv1.NodeClaim, _ int) (string, bool) { + return nc.Status.ProviderID, nc.Status.ProviderID != "" + })...) nodeList := &corev1.NodeList{} if err = c.kubeClient.List(ctx, nodeList); err != nil { return reconcile.Result{}, err } - resolvedProviderIDs := sets.New[string](lo.FilterMap(nodeClaimList.Items, func(n karpv1.NodeClaim, _ int) (string, bool) { - return n.Status.ProviderID, n.Status.ProviderID != "" - })...) - errs := make([]error, len(retrieved)) - workqueue.ParallelizeUntil(ctx, 100, len(managedRetrieved), func(i int) { - if !resolvedProviderIDs.Has(managedRetrieved[i].Status.ProviderID) && - time.Since(managedRetrieved[i].CreationTimestamp.Time) > time.Second*30 { - errs[i] = c.garbageCollect(ctx, managedRetrieved[i], nodeList) + errs := make([]error, len(cloudNodeClaims)) + workqueue.ParallelizeUntil(ctx, 100, len(cloudNodeClaims), func(i int) { + if nc := cloudNodeClaims[i]; !clusterProviderIDs.Has(nc.Status.ProviderID) && time.Since(nc.CreationTimestamp.Time) > time.Second*30 { + errs[i] = c.garbageCollect(ctx, cloudNodeClaims[i], nodeList) } }) if err = multierr.Combine(errs...); err != nil { diff --git a/pkg/controllers/nodeclass/hash/controller.go b/pkg/controllers/nodeclass/hash/controller.go index b3a87cce597c..38f4ca695b6c 100644 --- a/pkg/controllers/nodeclass/hash/controller.go +++ b/pkg/controllers/nodeclass/hash/controller.go @@ -26,6 +26,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/karpenter/pkg/operator/injection" + nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" "github.com/awslabs/operatorpkg/reasonable" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" @@ -83,14 +84,14 @@ func (c *Controller) Register(_ context.Context, m manager.Manager) error { // EC2NodeClass. Since, we cannot rely on the `ec2nodeclass-hash` on the NodeClaims, due to the breaking change, we will need to re-calculate the hash and update the annotation. // For more information on the Drift Hash Versioning: https://github.com/kubernetes-sigs/karpenter/blob/main/designs/drift-hash-versioning.md func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1.EC2NodeClass) error { - ncList := &karpv1.NodeClaimList{} - if err := c.kubeClient.List(ctx, ncList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil { + nodeClaims := &karpv1.NodeClaimList{} + if err := c.kubeClient.List(ctx, nodeClaims, nodeclaimutils.ForNodeClass(nodeClass)); err != nil { return err } - errs := make([]error, len(ncList.Items)) - for i := range ncList.Items { - nc := ncList.Items[i] + errs := make([]error, len(nodeClaims.Items)) + for i := range nodeClaims.Items { + nc := &nodeClaims.Items[i] stored := nc.DeepCopy() if nc.Annotations[v1.AnnotationEC2NodeClassHashVersion] != v1.EC2NodeClassHashVersion { @@ -107,7 +108,7 @@ func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1.EC2N } if !equality.Semantic.DeepEqual(stored, nc) { - if err := c.kubeClient.Patch(ctx, &nc, client.MergeFrom(stored)); err != nil { + if err := c.kubeClient.Patch(ctx, nc, client.MergeFrom(stored)); err != nil { errs[i] = client.IgnoreNotFound(err) } } diff --git a/pkg/controllers/nodeclass/hash/suite_test.go b/pkg/controllers/nodeclass/hash/suite_test.go index 10b781975385..4d21e0a6f8e3 100644 --- a/pkg/controllers/nodeclass/hash/suite_test.go +++ b/pkg/controllers/nodeclass/hash/suite_test.go @@ -55,7 +55,7 @@ func TestAPIs(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(test.EC2NodeClassFieldIndexer(ctx))) + env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) ctx = coreoptions.ToContext(ctx, coretest.Options()) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) diff --git a/pkg/controllers/nodeclass/status/suite_test.go b/pkg/controllers/nodeclass/status/suite_test.go index 3eb2b1152ec5..afb51f1f069a 100644 --- a/pkg/controllers/nodeclass/status/suite_test.go +++ b/pkg/controllers/nodeclass/status/suite_test.go @@ -48,7 +48,7 @@ func TestAPIs(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(test.EC2NodeClassFieldIndexer(ctx))) + env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) ctx = coreoptions.ToContext(ctx, coretest.Options()) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) diff --git a/pkg/controllers/nodeclass/termination/controller.go b/pkg/controllers/nodeclass/termination/controller.go index 0a3b8cb622db..b53eadf62b0d 100644 --- a/pkg/controllers/nodeclass/termination/controller.go +++ b/pkg/controllers/nodeclass/termination/controller.go @@ -21,6 +21,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/karpenter/pkg/operator/injection" + nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" @@ -78,12 +79,12 @@ func (c *Controller) finalize(ctx context.Context, nodeClass *v1.EC2NodeClass) ( if !controllerutil.ContainsFinalizer(nodeClass, v1.TerminationFinalizer) { return reconcile.Result{}, nil } - nodeClaimList := &karpv1.NodeClaimList{} - if err := c.kubeClient.List(ctx, nodeClaimList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil { + nodeClaims := &karpv1.NodeClaimList{} + if err := c.kubeClient.List(ctx, nodeClaims, nodeclaimutils.ForNodeClass(nodeClass)); err != nil { return reconcile.Result{}, fmt.Errorf("listing nodeclaims that are using nodeclass, %w", err) } - if len(nodeClaimList.Items) > 0 { - c.recorder.Publish(WaitingOnNodeClaimTerminationEvent(nodeClass, lo.Map(nodeClaimList.Items, func(nc karpv1.NodeClaim, _ int) string { return nc.Name }))) + if len(nodeClaims.Items) > 0 { + c.recorder.Publish(WaitingOnNodeClaimTerminationEvent(nodeClass, lo.Map(nodeClaims.Items, func(nc karpv1.NodeClaim, _ int) string { return nc.Name }))) return reconcile.Result{RequeueAfter: time.Minute * 10}, nil // periodically fire the event } if nodeClass.Spec.Role != "" { diff --git a/pkg/controllers/nodeclass/termination/suite_test.go b/pkg/controllers/nodeclass/termination/suite_test.go index 80e2ecaea1a6..2ea995d2f1e2 100644 --- a/pkg/controllers/nodeclass/termination/suite_test.go +++ b/pkg/controllers/nodeclass/termination/suite_test.go @@ -60,7 +60,7 @@ func TestAPIs(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(test.EC2NodeClassFieldIndexer(ctx))) + env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) ctx = coreoptions.ToContext(ctx, coretest.Options()) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) diff --git a/pkg/controllers/providers/instancetype/capacity/controller.go b/pkg/controllers/providers/instancetype/capacity/controller.go index 37997de1cea2..c3b189e44cdb 100644 --- a/pkg/controllers/providers/instancetype/capacity/controller.go +++ b/pkg/controllers/providers/instancetype/capacity/controller.go @@ -29,6 +29,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/operator/injection" nodeutils "sigs.k8s.io/karpenter/pkg/utils/node" @@ -38,18 +39,23 @@ import ( type Controller struct { kubeClient client.Client + cloudProvider cloudprovider.CloudProvider instancetypeProvider *instancetype.DefaultProvider } -func NewController(kubeClient client.Client, instancetypeProvider *instancetype.DefaultProvider) *Controller { +func NewController(kubeClient client.Client, cloudProvider cloudprovider.CloudProvider, instancetypeProvider *instancetype.DefaultProvider) *Controller { return &Controller{ kubeClient: kubeClient, + cloudProvider: cloudProvider, instancetypeProvider: instancetypeProvider, } } func (c *Controller) Reconcile(ctx context.Context, node *corev1.Node) (reconcile.Result, error) { ctx = injection.WithControllerName(ctx, "providers.instancetype.capacity") + if !nodeutils.IsManaged(node, c.cloudProvider) { + return reconcile.Result{}, nil + } nodeClaim, err := nodeutils.NodeClaimForNode(ctx, c.kubeClient, node) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to get nodeclaim for node, %w", err) @@ -82,7 +88,7 @@ func (c *Controller) Register(_ context.Context, m manager.Manager) error { }, DeleteFunc: func(e event.TypedDeleteEvent[client.Object]) bool { return false }, GenericFunc: func(e event.TypedGenericEvent[client.Object]) bool { return false }, - })). + }, nodeutils.IsManagedPredicateFuncs(c.cloudProvider))). WithOptions(controller.Options{ RateLimiter: reasonable.RateLimiter(), MaxConcurrentReconciles: 1, diff --git a/pkg/controllers/providers/instancetype/capacity/suite_test.go b/pkg/controllers/providers/instancetype/capacity/suite_test.go index fc427c036b7c..246d1656d71d 100644 --- a/pkg/controllers/providers/instancetype/capacity/suite_test.go +++ b/pkg/controllers/providers/instancetype/capacity/suite_test.go @@ -21,13 +21,17 @@ import ( "testing" "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/awslabs/operatorpkg/object" "github.com/samber/lo" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" - "sigs.k8s.io/karpenter/pkg/cloudprovider" + karpcloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/events" "sigs.k8s.io/karpenter/pkg/utils/resources" + "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" controllersinstancetypecapacity "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/instancetype/capacity" "github.com/aws/karpenter-provider-aws/pkg/fake" @@ -65,7 +69,7 @@ func TestAWS(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimFieldIndexer(ctx))) + env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimProviderIDFieldIndexer(ctx))) ctx = coreoptions.ToContext(ctx, coretest.Options()) ctx = options.ToContext(ctx, test.Options(test.OptionsFields{ VMMemoryOverheadPercent: lo.ToPtr[float64](0.075), @@ -75,7 +79,9 @@ var _ = BeforeSuite(func() { nodeClass = test.EC2NodeClass() nodeClaim = coretest.NodeClaim() node = coretest.Node() - controller = controllersinstancetypecapacity.NewController(env.Client, awsEnv.InstanceTypesProvider) + cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + controller = controllersinstancetypecapacity.NewController(env.Client, cloudProvider, awsEnv.InstanceTypesProvider) }) var _ = AfterSuite(func() { @@ -109,8 +115,9 @@ var _ = Describe("CapacityCache", func() { ObjectMeta: metav1.ObjectMeta{ Name: "test-node", Labels: map[string]string{ - corev1.LabelInstanceTypeStable: "t3.medium", - karpv1.NodeRegisteredLabelKey: "true", + corev1.LabelInstanceTypeStable: "t3.medium", + karpv1.NodeRegisteredLabelKey: "true", + "karpenter.k8s.aws/ec2nodeclass": nodeClass.Name, }, }, Capacity: corev1.ResourceList{ @@ -125,7 +132,9 @@ var _ = Describe("CapacityCache", func() { }, Spec: karpv1.NodeClaimSpec{ NodeClassRef: &karpv1.NodeClassReference{ - Name: nodeClass.Name, + Group: object.GVK(nodeClass).Group, + Kind: object.GVK(nodeClass).Kind, + Name: nodeClass.Name, }, Requirements: make([]karpv1.NodeSelectorRequirementWithMinValues, 0), }, @@ -140,7 +149,7 @@ var _ = Describe("CapacityCache", func() { ExpectObjectReconciled(ctx, env.Client, controller, node) instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) - i, ok := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool { + i, ok := lo.Find(instanceTypes, func(i *karpcloudprovider.InstanceType) bool { return i.Name == "t3.medium" }) Expect(ok).To(BeTrue()) @@ -155,7 +164,7 @@ var _ = Describe("CapacityCache", func() { ExpectObjectReconciled(ctx, env.Client, controller, node) instanceTypesNoCache, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) - i, ok := lo.Find(instanceTypesNoCache, func(i *cloudprovider.InstanceType) bool { + i, ok := lo.Find(instanceTypesNoCache, func(i *karpcloudprovider.InstanceType) bool { return i.Name == "t3.medium" }) Expect(ok).To(BeTrue()) diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 7594af931c2e..1b6ec4604571 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -271,7 +271,6 @@ func (p *DefaultProvider) UpdateInstanceTypeOfferings(ctx context.Context) error } func (p *DefaultProvider) UpdateInstanceTypeCapacityFromNode(ctx context.Context, node *corev1.Node, nodeClaim *karpv1.NodeClaim, nodeClass *v1.EC2NodeClass) error { - // Get mappings for most recent AMIs instanceTypeName := node.Labels[corev1.LabelInstanceTypeStable] amiMap := amifamily.MapToInstanceTypes([]*cloudprovider.InstanceType{{ diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 2b2bc4c0b892..6505d0c6ec3f 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -31,6 +31,7 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/awslabs/operatorpkg/object" "github.com/awslabs/operatorpkg/status" "github.com/imdario/mergo" . "github.com/onsi/ginkgo/v2" @@ -88,7 +89,7 @@ var _ = BeforeSuite(func() { fakeClock = &clock.FakeClock{} cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) - cluster = state.NewCluster(fakeClock, env.Client) + cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster, fakeClock) }) @@ -160,7 +161,9 @@ var _ = Describe("InstanceTypeProvider", func() { }, }, NodeClassRef: &karpv1.NodeClassReference{ - Name: nodeClass.Name, + Group: object.GVK(nodeClass).Group, + Kind: object.GVK(nodeClass).Kind, + Name: nodeClass.Name, }, }, }, @@ -203,7 +206,9 @@ var _ = Describe("InstanceTypeProvider", func() { }, }, NodeClassRef: &karpv1.NodeClassReference{ - Name: windowsNodeClass.Name, + Group: object.GVK(windowsNodeClass).Group, + Kind: object.GVK(windowsNodeClass).Kind, + Name: windowsNodeClass.Name, }, }, }, diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index ddaa228e3b2e..5c882274b328 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -98,7 +98,7 @@ var _ = BeforeSuite(func() { fakeClock = &clock.FakeClock{} cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) - cluster = state.NewCluster(fakeClock, env.Client) + cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster, fakeClock) }) @@ -1441,7 +1441,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err = os.ReadFile("testdata/br_userdata_merged.golden") Expect(err).To(BeNil()) - ExpectLaunchTemplatesCreatedWithUserData(fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name)) + ExpectLaunchTemplatesCreatedWithUserData(fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name)) }) It("should bootstrap when custom user data is empty", func() { nodePool.Spec.Template.Spec.Taints = []corev1.Taint{{Key: "foo", Value: "bar", Effect: corev1.TaintEffectNoExecute}} @@ -1455,7 +1455,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err := os.ReadFile("testdata/br_userdata_unmerged.golden") Expect(err).To(BeNil()) - ExpectLaunchTemplatesCreatedWithUserData(fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name)) + ExpectLaunchTemplatesCreatedWithUserData(fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name)) }) It("should not bootstrap when provider ref points to a non-existent EC2NodeClass resource", func() { nodePool.Spec.Template.Spec.NodeClassRef = &karpv1.NodeClassReference{ @@ -1655,7 +1655,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err = os.ReadFile("testdata/al2_userdata_merged.golden") Expect(err).To(BeNil()) - expectedUserData := fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name) + expectedUserData := fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name) ExpectLaunchTemplatesCreatedWithUserData(expectedUserData) }) It("should merge in custom user data when Content-Type is before MIME-Version", func() { @@ -1668,7 +1668,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err = os.ReadFile("testdata/al2_userdata_merged.golden") Expect(err).To(BeNil()) - expectedUserData := fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name) + expectedUserData := fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name) ExpectLaunchTemplatesCreatedWithUserData(expectedUserData) }) It("should merge in custom user data not in multi-part mime format", func() { @@ -1681,7 +1681,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err = os.ReadFile("testdata/al2_userdata_merged.golden") Expect(err).To(BeNil()) - expectedUserData := fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name) + expectedUserData := fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name) ExpectLaunchTemplatesCreatedWithUserData(expectedUserData) }) It("should handle empty custom user data", func() { @@ -1692,7 +1692,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err := os.ReadFile("testdata/al2_userdata_unmerged.golden") Expect(err).To(BeNil()) - expectedUserData := fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name) + expectedUserData := fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name) ExpectLaunchTemplatesCreatedWithUserData(expectedUserData) }) }) @@ -1880,7 +1880,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err := os.ReadFile("testdata/" + mergedFile) Expect(err).To(BeNil()) - expectedUserData := fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name) + expectedUserData := fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name) ExpectLaunchTemplatesCreatedWithUserData(expectedUserData) }, Entry("MIME", lo.ToPtr("al2023_mime_userdata_input.golden"), "al2023_mime_userdata_merged.golden"), @@ -2153,7 +2153,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err = os.ReadFile("testdata/windows_userdata_merged.golden") Expect(err).To(BeNil()) - ExpectLaunchTemplatesCreatedWithUserData(fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name)) + ExpectLaunchTemplatesCreatedWithUserData(fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name)) }) It("should bootstrap when custom user data is empty", func() { ExpectApplied(ctx, env.Client, nodeClass, nodePool) @@ -2168,7 +2168,7 @@ essential = true ExpectScheduled(ctx, env.Client, pod) content, err := os.ReadFile("testdata/windows_userdata_unmerged.golden") Expect(err).To(BeNil()) - ExpectLaunchTemplatesCreatedWithUserData(fmt.Sprintf(string(content), karpv1.NodePoolLabelKey, nodePool.Name)) + ExpectLaunchTemplatesCreatedWithUserData(fmt.Sprintf(string(content), nodeClass.Name, karpv1.NodePoolLabelKey, nodePool.Name)) }) }) }) diff --git a/pkg/providers/launchtemplate/testdata/al2023_mime_userdata_merged.golden b/pkg/providers/launchtemplate/testdata/al2023_mime_userdata_merged.golden index 335f650b12ab..f293575ffe5f 100644 --- a/pkg/providers/launchtemplate/testdata/al2023_mime_userdata_merged.golden +++ b/pkg/providers/launchtemplate/testdata/al2023_mime_userdata_merged.golden @@ -46,6 +46,6 @@ spec: - effect: NoExecute key: karpenter.sh/unregistered flags: - - --node-labels="karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" + - --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --//-- diff --git a/pkg/providers/launchtemplate/testdata/al2023_shell_userdata_merged.golden b/pkg/providers/launchtemplate/testdata/al2023_shell_userdata_merged.golden index 2b2fdd8c3c54..b4b1faa5a797 100644 --- a/pkg/providers/launchtemplate/testdata/al2023_shell_userdata_merged.golden +++ b/pkg/providers/launchtemplate/testdata/al2023_shell_userdata_merged.golden @@ -32,6 +32,6 @@ spec: - effect: NoExecute key: karpenter.sh/unregistered flags: - - --node-labels="karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" + - --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --//-- diff --git a/pkg/providers/launchtemplate/testdata/al2023_userdata_unmerged.golden b/pkg/providers/launchtemplate/testdata/al2023_userdata_unmerged.golden index cf18ed3508f4..13f22ded4f36 100644 --- a/pkg/providers/launchtemplate/testdata/al2023_userdata_unmerged.golden +++ b/pkg/providers/launchtemplate/testdata/al2023_userdata_unmerged.golden @@ -27,6 +27,6 @@ spec: - effect: NoExecute key: karpenter.sh/unregistered flags: - - --node-labels="karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" + - --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --//-- diff --git a/pkg/providers/launchtemplate/testdata/al2023_yaml_userdata_merged.golden b/pkg/providers/launchtemplate/testdata/al2023_yaml_userdata_merged.golden index d54c3b4ece9e..dcae03bb27aa 100644 --- a/pkg/providers/launchtemplate/testdata/al2023_yaml_userdata_merged.golden +++ b/pkg/providers/launchtemplate/testdata/al2023_yaml_userdata_merged.golden @@ -41,6 +41,6 @@ spec: - effect: NoExecute key: karpenter.sh/unregistered flags: - - --node-labels="karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" + - --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --//-- diff --git a/pkg/providers/launchtemplate/testdata/al2_userdata_merged.golden b/pkg/providers/launchtemplate/testdata/al2_userdata_merged.golden index 22c8a37dfc03..bf93f8a67b30 100644 --- a/pkg/providers/launchtemplate/testdata/al2_userdata_merged.golden +++ b/pkg/providers/launchtemplate/testdata/al2_userdata_merged.golden @@ -15,5 +15,5 @@ exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 /etc/eks/bootstrap.sh 'test-cluster' --apiserver-endpoint 'https://test-cluster' --b64-cluster-ca 'ca-bundle' \ --dns-cluster-ip '10.0.100.10' \ --use-max-pods false \ ---kubelet-extra-args '--node-labels="karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' +--kubelet-extra-args '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' --//-- diff --git a/pkg/providers/launchtemplate/testdata/al2_userdata_unmerged.golden b/pkg/providers/launchtemplate/testdata/al2_userdata_unmerged.golden index 39ecb867db43..bbf98e8d7716 100644 --- a/pkg/providers/launchtemplate/testdata/al2_userdata_unmerged.golden +++ b/pkg/providers/launchtemplate/testdata/al2_userdata_unmerged.golden @@ -9,5 +9,5 @@ exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 /etc/eks/bootstrap.sh 'test-cluster' --apiserver-endpoint 'https://test-cluster' --b64-cluster-ca 'ca-bundle' \ --dns-cluster-ip '10.0.100.10' \ --use-max-pods false \ ---kubelet-extra-args '--node-labels="karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' +--kubelet-extra-args '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' --//-- diff --git a/pkg/providers/launchtemplate/testdata/br_userdata_merged.golden b/pkg/providers/launchtemplate/testdata/br_userdata_merged.golden index a9a425e55aba..47f942f7564f 100644 --- a/pkg/providers/launchtemplate/testdata/br_userdata_merged.golden +++ b/pkg/providers/launchtemplate/testdata/br_userdata_merged.golden @@ -9,6 +9,7 @@ max-pods = 110 [settings.kubernetes.node-labels] custom-node-label = 'custom' +'karpenter.k8s.aws/ec2nodeclass' = '%s' 'karpenter.sh/capacity-type' = 'on-demand' '%s' = '%s' 'testing/cluster' = 'unspecified' diff --git a/pkg/providers/launchtemplate/testdata/br_userdata_unmerged.golden b/pkg/providers/launchtemplate/testdata/br_userdata_unmerged.golden index b81fe613e8e0..e163f9299f94 100644 --- a/pkg/providers/launchtemplate/testdata/br_userdata_unmerged.golden +++ b/pkg/providers/launchtemplate/testdata/br_userdata_unmerged.golden @@ -7,6 +7,7 @@ cluster-dns-ip = '10.0.100.10' max-pods = 110 [settings.kubernetes.node-labels] +'karpenter.k8s.aws/ec2nodeclass' = '%s' 'karpenter.sh/capacity-type' = 'on-demand' '%s' = '%s' 'testing/cluster' = 'unspecified' diff --git a/pkg/providers/launchtemplate/testdata/windows_userdata_merged.golden b/pkg/providers/launchtemplate/testdata/windows_userdata_merged.golden index d4c7b6285210..3cef64c52c1a 100644 --- a/pkg/providers/launchtemplate/testdata/windows_userdata_merged.golden +++ b/pkg/providers/launchtemplate/testdata/windows_userdata_merged.golden @@ -2,5 +2,5 @@ Write-Host "Running custom user data script" Write-Host "Finished running custom user data script" [string]$EKSBootstrapScriptFile = "$env:ProgramFiles\Amazon\EKS\Start-EKSBootstrap.ps1" -& $EKSBootstrapScriptFile -EKSClusterName 'test-cluster' -APIServerEndpoint 'https://test-cluster' -Base64ClusterCA 'ca-bundle' -KubeletExtraArgs '--node-labels="karpenter.sh/capacity-type=spot,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' -DNSClusterIP '10.0.100.10' +& $EKSBootstrapScriptFile -EKSClusterName 'test-cluster' -APIServerEndpoint 'https://test-cluster' -Base64ClusterCA 'ca-bundle' -KubeletExtraArgs '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=spot,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' -DNSClusterIP '10.0.100.10' diff --git a/pkg/providers/launchtemplate/testdata/windows_userdata_unmerged.golden b/pkg/providers/launchtemplate/testdata/windows_userdata_unmerged.golden index a0b309c57f9c..03db7bb4d543 100644 --- a/pkg/providers/launchtemplate/testdata/windows_userdata_unmerged.golden +++ b/pkg/providers/launchtemplate/testdata/windows_userdata_unmerged.golden @@ -1,4 +1,4 @@ [string]$EKSBootstrapScriptFile = "$env:ProgramFiles\Amazon\EKS\Start-EKSBootstrap.ps1" -& $EKSBootstrapScriptFile -EKSClusterName 'test-cluster' -APIServerEndpoint 'https://test-cluster' -Base64ClusterCA 'ca-bundle' -KubeletExtraArgs '--node-labels="karpenter.sh/capacity-type=spot,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' -DNSClusterIP '10.0.100.10' +& $EKSBootstrapScriptFile -EKSClusterName 'test-cluster' -APIServerEndpoint 'https://test-cluster' -Base64ClusterCA 'ca-bundle' -KubeletExtraArgs '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=spot,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' -DNSClusterIP '10.0.100.10' diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 50b04f5c1a59..89e5760f48f5 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -26,6 +26,7 @@ import ( karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/fake" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" @@ -46,6 +47,7 @@ import ( func init() { karpv1.NormalizedLabels = lo.Assign(karpv1.NormalizedLabels, map[string]string{"topology.ebs.csi.aws.com/zone": corev1.LabelTopologyZone}) + coretest.SetDefaultNodeClassType(&v1.EC2NodeClass{}) } type Environment struct { diff --git a/pkg/test/nodeclass.go b/pkg/test/nodeclass.go index f04d80817bf2..71f5e39b5ec1 100644 --- a/pkg/test/nodeclass.go +++ b/pkg/test/nodeclass.go @@ -15,13 +15,10 @@ limitations under the License. package test import ( - "context" "fmt" "github.com/imdario/mergo" corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/controller-runtime/pkg/cache" - "sigs.k8s.io/controller-runtime/pkg/client" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/test" @@ -128,18 +125,6 @@ func EC2NodeClass(overrides ...v1.EC2NodeClass) *v1.EC2NodeClass { } } -func EC2NodeClassFieldIndexer(ctx context.Context) func(cache.Cache) error { - return func(c cache.Cache) error { - return c.IndexField(ctx, &karpv1.NodeClaim{}, "spec.nodeClassRef.name", func(obj client.Object) []string { - nc := obj.(*karpv1.NodeClaim) - if nc.Spec.NodeClassRef == nil { - return []string{""} - } - return []string{nc.Spec.NodeClassRef.Name} - }) - } -} - type TestNodeClass struct { v1.EC2NodeClass }