diff --git a/Makefile b/Makefile index 0a7836ec1889..8b55c2bb71da 100644 --- a/Makefile +++ b/Makefile @@ -116,7 +116,8 @@ coverage: verify: tidy download ## Verify code. Includes dependencies, linting, formatting, etc go generate ./... hack/boilerplate.sh - cp $(KARPENTER_CORE_DIR)/pkg/apis/crds/* pkg/apis/crds + cp $(KARPENTER_CORE_DIR)/pkg/apis/crds/* pkg/apis/crds + hack/validation/requirements.sh $(foreach dir,$(MOD_DIRS),cd $(dir) && golangci-lint run $(newline)) @git diff --quiet ||\ { echo "New file modification detected in the Git working tree. Please check in before commit."; git --no-pager diff --name-only | uniq | awk '{print " - " $$0}'; \ diff --git a/hack/validation/requirements.sh b/hack/validation/requirements.sh new file mode 100755 index 000000000000..b33f9774163b --- /dev/null +++ b/hack/validation/requirements.sh @@ -0,0 +1,12 @@ +# Requirements Validation + +# Adding validation for nodeclaim + +## checking for restricted labels while filtering out well known labels +yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml +# # Adding validation for nodepool + +# ## checking for restricted labels while filtering out well known labels +yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index 77ddfd3bf6f8..a152f6787a4f 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -182,19 +182,47 @@ spec: key: description: The label key that the selector applies to. type: string + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ + x-kubernetes-validations: + - message: label domain "kubernetes.io" is restricted + rule: self in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/zone", "topology.kubernetes.io/region", "node.kubernetes.io/instance-type", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || self.startsWith("node.kubernetes.io/") || self.startsWith("node-restriction.kubernetes.io/") || !self.find("^([^/]+)").endsWith("kubernetes.io") + - message: label domain "k8s.io" is restricted + rule: self.startsWith("kops.k8s.io/") || !self.find("^([^/]+)").endsWith("k8s.io") + - message: label domain "karpenter.sh" is restricted + rule: self in ["karpenter.sh/capacity-type", "karpenter.sh/nodepool"] || !self.find("^([^/]+)").endsWith("karpenter.sh") + - message: label "kubernetes.io/hostname" is restricted + rule: self != "kubernetes.io/hostname" + - message: label domain "karpenter.k8s.aws" is restricted + rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") operator: description: Represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. type: string + enum: + - In + - NotIn + - Exists + - DoesNotExist + - Gt + - Lt values: description: An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. This array is replaced during a strategic merge patch. items: type: string type: array + maxLength: 63 + pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$ required: - key - operator type: object + maxItems: 30 type: array + x-kubernetes-validations: + - message: requirements with operator 'In' must have a value defined + rule: 'self.all(x, x.operator == ''In'' ? x.values.size() != 0 : true)' + - message: requirements operator 'Gt' or 'Lt' must have a single positive integer value + rule: 'self.all(x, (x.operator == ''Gt'' || x.operator == ''Lt'') ? (x.values.size() == 1 && int(x.values[0]) >= 0) : true)' resources: description: Resources models the resource requirements for the NodeClaim to launch properties: diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 086b9e991f7f..4d93d4ed1bf2 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -218,19 +218,49 @@ spec: key: description: The label key that the selector applies to. type: string + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ + x-kubernetes-validations: + - message: label domain "kubernetes.io" is restricted + rule: self in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/zone", "topology.kubernetes.io/region", "node.kubernetes.io/instance-type", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || self.startsWith("node.kubernetes.io/") || self.startsWith("node-restriction.kubernetes.io/") || !self.find("^([^/]+)").endsWith("kubernetes.io") + - message: label domain "k8s.io" is restricted + rule: self.startsWith("kops.k8s.io/") || !self.find("^([^/]+)").endsWith("k8s.io") + - message: label domain "karpenter.sh" is restricted + rule: self in ["karpenter.sh/capacity-type", "karpenter.sh/nodepool"] || !self.find("^([^/]+)").endsWith("karpenter.sh") + - message: label "karpenter.sh/nodepool" is restricted + rule: self != "karpenter.sh/nodepool" + - message: label "kubernetes.io/hostname" is restricted + rule: self != "kubernetes.io/hostname" + - message: label domain "karpenter.k8s.aws" is restricted + rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") operator: description: Represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. type: string + enum: + - In + - NotIn + - Exists + - DoesNotExist + - Gt + - Lt values: description: An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. This array is replaced during a strategic merge patch. items: type: string type: array + maxLength: 63 + pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$ required: - key - operator type: object + maxItems: 30 type: array + x-kubernetes-validations: + - message: requirements with operator 'In' must have a value defined + rule: 'self.all(x, x.operator == ''In'' ? x.values.size() != 0 : true)' + - message: requirements operator 'Gt' or 'Lt' must have a single positive integer value + rule: 'self.all(x, (x.operator == ''Gt'' || x.operator == ''Lt'') ? (x.values.size() == 1 && int(x.values[0]) >= 0) : true)' resources: description: Resources models the resource requirements for the NodeClaim to launch properties: diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index 84396f3f4d00..21559932fa3c 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -38,7 +38,6 @@ func init() { LabelInstanceCPU, LabelInstanceMemory, LabelInstanceNetworkBandwidth, - LabelInstancePods, LabelInstanceGPUName, LabelInstanceGPUManufacturer, LabelInstanceGPUCount, @@ -105,7 +104,6 @@ var ( LabelInstanceCPU = Group + "/instance-cpu" LabelInstanceMemory = Group + "/instance-memory" LabelInstanceNetworkBandwidth = Group + "/instance-network-bandwidth" - LabelInstancePods = Group + "/instance-pods" LabelInstanceGPUName = Group + "/instance-gpu-name" LabelInstanceGPUManufacturer = Group + "/instance-gpu-manufacturer" LabelInstanceGPUCount = Group + "/instance-gpu-count" diff --git a/pkg/apis/v1beta1/nodepool_validation_cel_test.go b/pkg/apis/v1beta1/nodepool_validation_cel_test.go new file mode 100644 index 000000000000..62603326c4a5 --- /dev/null +++ b/pkg/apis/v1beta1/nodepool_validation_cel_test.go @@ -0,0 +1,81 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1_test + +import ( + "strings" + + "github.com/Pallinder/go-randomdata" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + + "github.com/aws/karpenter-core/pkg/apis/v1beta1" +) + +var _ = Describe("CEL/Validation", func() { + var nodePool *v1beta1.NodePool + + BeforeEach(func() { + if env.Version.Minor() < 25 { + Skip("CEL Validation is for 1.25>") + } + nodePool = &v1beta1.NodePool{ + ObjectMeta: metav1.ObjectMeta{Name: strings.ToLower(randomdata.SillyName())}, + Spec: v1beta1.NodePoolSpec{ + Template: v1beta1.NodeClaimTemplate{ + Spec: v1beta1.NodeClaimSpec{ + NodeClassRef: &v1beta1.NodeClassReference{ + Kind: "NodeClaim", + Name: "default", + }, + Requirements: []v1.NodeSelectorRequirement{ + { + Key: v1beta1.CapacityTypeLabelKey, + Operator: v1.NodeSelectorOpExists, + }, + }, + }, + }, + }, + } + }) + It("should allow restricted domains exceptions", func() { + oldNodePool := nodePool.DeepCopy() + for label := range v1beta1.LabelDomainExceptions { + nodePool.Spec.Template.Spec.Requirements = []v1.NodeSelectorRequirement{ + {Key: label + "/test", Operator: v1.NodeSelectorOpIn, Values: []string{"test"}}, + } + Expect(env.Client.Create(ctx, nodePool)).To(Succeed()) + Expect(nodePool.RuntimeValidate()).To(Succeed()) + Expect(env.Client.Delete(ctx, nodePool)).To(Succeed()) + nodePool = oldNodePool.DeepCopy() + } + }) + It("should allow well known label exceptions", func() { + oldNodePool := nodePool.DeepCopy() + for label := range v1beta1.WellKnownLabels.Difference(sets.New(v1beta1.NodePoolLabelKey)) { + nodePool.Spec.Template.Spec.Requirements = []v1.NodeSelectorRequirement{ + {Key: label, Operator: v1.NodeSelectorOpIn, Values: []string{"test"}}, + } + Expect(env.Client.Create(ctx, nodePool)).To(Succeed()) + Expect(nodePool.RuntimeValidate()).To(Succeed()) + Expect(env.Client.Delete(ctx, nodePool)).To(Succeed()) + nodePool = oldNodePool.DeepCopy() + } + }) +}) diff --git a/pkg/cloudprovider/nodeclaim_test.go b/pkg/cloudprovider/nodeclaim_test.go index 99b0851b4ca1..218d358d1e70 100644 --- a/pkg/cloudprovider/nodeclaim_test.go +++ b/pkg/cloudprovider/nodeclaim_test.go @@ -79,7 +79,7 @@ var _ = Describe("NodeClaim/CloudProvider", func() { { Key: v1.LabelInstanceTypeStable, Operator: v1.NodeSelectorOpIn, - Values: []string{}, + Values: []string{"test-instance-type"}, }, } ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) diff --git a/pkg/providers/instancetype/nodeclass_test.go b/pkg/providers/instancetype/nodeclass_test.go index 5b2d78214679..620a01e03149 100644 --- a/pkg/providers/instancetype/nodeclass_test.go +++ b/pkg/providers/instancetype/nodeclass_test.go @@ -116,7 +116,6 @@ var _ = Describe("NodeClass/InstanceTypes", func() { v1beta1.LabelInstanceCPU: "32", v1beta1.LabelInstanceMemory: "131072", v1beta1.LabelInstanceNetworkBandwidth: "50000", - v1beta1.LabelInstancePods: "58", v1beta1.LabelInstanceGPUName: "t4", v1beta1.LabelInstanceGPUManufacturer: "nvidia", v1beta1.LabelInstanceGPUCount: "1", @@ -169,7 +168,6 @@ var _ = Describe("NodeClass/InstanceTypes", func() { v1beta1.LabelInstanceCPU: "32", v1beta1.LabelInstanceMemory: "131072", v1beta1.LabelInstanceNetworkBandwidth: "50000", - v1beta1.LabelInstancePods: "58", v1beta1.LabelInstanceGPUName: "t4", v1beta1.LabelInstanceGPUManufacturer: "nvidia", v1beta1.LabelInstanceGPUCount: "1", @@ -220,7 +218,6 @@ var _ = Describe("NodeClass/InstanceTypes", func() { v1beta1.LabelInstanceCPU: "8", v1beta1.LabelInstanceMemory: "16384", v1beta1.LabelInstanceNetworkBandwidth: "5000", - v1beta1.LabelInstancePods: "38", v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1",