Skip to content

Commit

Permalink
feat: CEL Validation for NodeClaim Requirement (aws#4895)
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam authored and johngmyers committed May 31, 2024
1 parent 110b6cd commit 79389b0
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 7 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ coverage:
verify: tidy download ## Verify code. Includes dependencies, linting, formatting, etc
go generate ./...
hack/boilerplate.sh
cp $(KARPENTER_CORE_DIR)/pkg/apis/crds/* pkg/apis/crds
cp $(KARPENTER_CORE_DIR)/pkg/apis/crds/* pkg/apis/crds
hack/validation/requirements.sh
$(foreach dir,$(MOD_DIRS),cd $(dir) && golangci-lint run $(newline))
@git diff --quiet ||\
{ echo "New file modification detected in the Git working tree. Please check in before commit."; git --no-pager diff --name-only | uniq | awk '{print " - " $$0}'; \
Expand Down
12 changes: 12 additions & 0 deletions hack/validation/requirements.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Requirements Validation

# Adding validation for nodeclaim

## checking for restricted labels while filtering out well known labels
yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [
{"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml
# # Adding validation for nodepool

# ## checking for restricted labels while filtering out well known labels
yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [
{"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml
28 changes: 28 additions & 0 deletions pkg/apis/crds/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,19 +182,47 @@ spec:
key:
description: The label key that the selector applies to.
type: string
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$
x-kubernetes-validations:
- message: label domain "kubernetes.io" is restricted
rule: self in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/zone", "topology.kubernetes.io/region", "node.kubernetes.io/instance-type", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || self.startsWith("node.kubernetes.io/") || self.startsWith("node-restriction.kubernetes.io/") || !self.find("^([^/]+)").endsWith("kubernetes.io")
- message: label domain "k8s.io" is restricted
rule: self.startsWith("kops.k8s.io/") || !self.find("^([^/]+)").endsWith("k8s.io")
- message: label domain "karpenter.sh" is restricted
rule: self in ["karpenter.sh/capacity-type", "karpenter.sh/nodepool"] || !self.find("^([^/]+)").endsWith("karpenter.sh")
- message: label "kubernetes.io/hostname" is restricted
rule: self != "kubernetes.io/hostname"
- message: label domain "karpenter.k8s.aws" is restricted
rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws")
operator:
description: Represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
enum:
- In
- NotIn
- Exists
- DoesNotExist
- Gt
- Lt
values:
description: An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. This array is replaced during a strategic merge patch.
items:
type: string
type: array
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
required:
- key
- operator
type: object
maxItems: 30
type: array
x-kubernetes-validations:
- message: requirements with operator 'In' must have a value defined
rule: 'self.all(x, x.operator == ''In'' ? x.values.size() != 0 : true)'
- message: requirements operator 'Gt' or 'Lt' must have a single positive integer value
rule: 'self.all(x, (x.operator == ''Gt'' || x.operator == ''Lt'') ? (x.values.size() == 1 && int(x.values[0]) >= 0) : true)'
resources:
description: Resources models the resource requirements for the NodeClaim to launch
properties:
Expand Down
30 changes: 30 additions & 0 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -218,19 +218,49 @@ spec:
key:
description: The label key that the selector applies to.
type: string
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*(\/))?([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$
x-kubernetes-validations:
- message: label domain "kubernetes.io" is restricted
rule: self in ["beta.kubernetes.io/instance-type", "failure-domain.beta.kubernetes.io/region", "beta.kubernetes.io/os", "beta.kubernetes.io/arch", "failure-domain.beta.kubernetes.io/zone", "topology.kubernetes.io/zone", "topology.kubernetes.io/region", "node.kubernetes.io/instance-type", "kubernetes.io/arch", "kubernetes.io/os", "node.kubernetes.io/windows-build"] || self.startsWith("node.kubernetes.io/") || self.startsWith("node-restriction.kubernetes.io/") || !self.find("^([^/]+)").endsWith("kubernetes.io")
- message: label domain "k8s.io" is restricted
rule: self.startsWith("kops.k8s.io/") || !self.find("^([^/]+)").endsWith("k8s.io")
- message: label domain "karpenter.sh" is restricted
rule: self in ["karpenter.sh/capacity-type", "karpenter.sh/nodepool"] || !self.find("^([^/]+)").endsWith("karpenter.sh")
- message: label "karpenter.sh/nodepool" is restricted
rule: self != "karpenter.sh/nodepool"
- message: label "kubernetes.io/hostname" is restricted
rule: self != "kubernetes.io/hostname"
- message: label domain "karpenter.k8s.aws" is restricted
rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws")
operator:
description: Represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
enum:
- In
- NotIn
- Exists
- DoesNotExist
- Gt
- Lt
values:
description: An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. This array is replaced during a strategic merge patch.
items:
type: string
type: array
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
required:
- key
- operator
type: object
maxItems: 30
type: array
x-kubernetes-validations:
- message: requirements with operator 'In' must have a value defined
rule: 'self.all(x, x.operator == ''In'' ? x.values.size() != 0 : true)'
- message: requirements operator 'Gt' or 'Lt' must have a single positive integer value
rule: 'self.all(x, (x.operator == ''Gt'' || x.operator == ''Lt'') ? (x.values.size() == 1 && int(x.values[0]) >= 0) : true)'
resources:
description: Resources models the resource requirements for the NodeClaim to launch
properties:
Expand Down
2 changes: 0 additions & 2 deletions pkg/apis/v1beta1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ func init() {
LabelInstanceCPU,
LabelInstanceMemory,
LabelInstanceNetworkBandwidth,
LabelInstancePods,
LabelInstanceGPUName,
LabelInstanceGPUManufacturer,
LabelInstanceGPUCount,
Expand Down Expand Up @@ -105,7 +104,6 @@ var (
LabelInstanceCPU = Group + "/instance-cpu"
LabelInstanceMemory = Group + "/instance-memory"
LabelInstanceNetworkBandwidth = Group + "/instance-network-bandwidth"
LabelInstancePods = Group + "/instance-pods"
LabelInstanceGPUName = Group + "/instance-gpu-name"
LabelInstanceGPUManufacturer = Group + "/instance-gpu-manufacturer"
LabelInstanceGPUCount = Group + "/instance-gpu-count"
Expand Down
81 changes: 81 additions & 0 deletions pkg/apis/v1beta1/nodepool_validation_cel_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1beta1_test

import (
"strings"

"github.com/Pallinder/go-randomdata"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"

"github.com/aws/karpenter-core/pkg/apis/v1beta1"
)

var _ = Describe("CEL/Validation", func() {
var nodePool *v1beta1.NodePool

BeforeEach(func() {
if env.Version.Minor() < 25 {
Skip("CEL Validation is for 1.25>")
}
nodePool = &v1beta1.NodePool{
ObjectMeta: metav1.ObjectMeta{Name: strings.ToLower(randomdata.SillyName())},
Spec: v1beta1.NodePoolSpec{
Template: v1beta1.NodeClaimTemplate{
Spec: v1beta1.NodeClaimSpec{
NodeClassRef: &v1beta1.NodeClassReference{
Kind: "NodeClaim",
Name: "default",
},
Requirements: []v1.NodeSelectorRequirement{
{
Key: v1beta1.CapacityTypeLabelKey,
Operator: v1.NodeSelectorOpExists,
},
},
},
},
},
}
})
It("should allow restricted domains exceptions", func() {
oldNodePool := nodePool.DeepCopy()
for label := range v1beta1.LabelDomainExceptions {
nodePool.Spec.Template.Spec.Requirements = []v1.NodeSelectorRequirement{
{Key: label + "/test", Operator: v1.NodeSelectorOpIn, Values: []string{"test"}},
}
Expect(env.Client.Create(ctx, nodePool)).To(Succeed())
Expect(nodePool.RuntimeValidate()).To(Succeed())
Expect(env.Client.Delete(ctx, nodePool)).To(Succeed())
nodePool = oldNodePool.DeepCopy()
}
})
It("should allow well known label exceptions", func() {
oldNodePool := nodePool.DeepCopy()
for label := range v1beta1.WellKnownLabels.Difference(sets.New(v1beta1.NodePoolLabelKey)) {
nodePool.Spec.Template.Spec.Requirements = []v1.NodeSelectorRequirement{
{Key: label, Operator: v1.NodeSelectorOpIn, Values: []string{"test"}},
}
Expect(env.Client.Create(ctx, nodePool)).To(Succeed())
Expect(nodePool.RuntimeValidate()).To(Succeed())
Expect(env.Client.Delete(ctx, nodePool)).To(Succeed())
nodePool = oldNodePool.DeepCopy()
}
})
})
2 changes: 1 addition & 1 deletion pkg/cloudprovider/nodeclaim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ var _ = Describe("NodeClaim/CloudProvider", func() {
{
Key: v1.LabelInstanceTypeStable,
Operator: v1.NodeSelectorOpIn,
Values: []string{},
Values: []string{"test-instance-type"},
},
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
Expand Down
3 changes: 0 additions & 3 deletions pkg/providers/instancetype/nodeclass_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ var _ = Describe("NodeClass/InstanceTypes", func() {
v1beta1.LabelInstanceCPU: "32",
v1beta1.LabelInstanceMemory: "131072",
v1beta1.LabelInstanceNetworkBandwidth: "50000",
v1beta1.LabelInstancePods: "58",
v1beta1.LabelInstanceGPUName: "t4",
v1beta1.LabelInstanceGPUManufacturer: "nvidia",
v1beta1.LabelInstanceGPUCount: "1",
Expand Down Expand Up @@ -169,7 +168,6 @@ var _ = Describe("NodeClass/InstanceTypes", func() {
v1beta1.LabelInstanceCPU: "32",
v1beta1.LabelInstanceMemory: "131072",
v1beta1.LabelInstanceNetworkBandwidth: "50000",
v1beta1.LabelInstancePods: "58",
v1beta1.LabelInstanceGPUName: "t4",
v1beta1.LabelInstanceGPUManufacturer: "nvidia",
v1beta1.LabelInstanceGPUCount: "1",
Expand Down Expand Up @@ -220,7 +218,6 @@ var _ = Describe("NodeClass/InstanceTypes", func() {
v1beta1.LabelInstanceCPU: "8",
v1beta1.LabelInstanceMemory: "16384",
v1beta1.LabelInstanceNetworkBandwidth: "5000",
v1beta1.LabelInstancePods: "38",
v1beta1.LabelInstanceAcceleratorName: "inferentia",
v1beta1.LabelInstanceAcceleratorManufacturer: "aws",
v1beta1.LabelInstanceAcceleratorCount: "1",
Expand Down

0 comments on commit 79389b0

Please sign in to comment.