From 1112aea53748f4aa0801ba8f4a05fe9b6fc76f46 Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Sun, 15 Sep 2024 21:38:22 -0700 Subject: [PATCH] Fix race condition with using newer NodePool for hash annotation --- .../karpenter.kwok.sh_kwoknodeclasses.yaml | 2 +- kwok/charts/crds/karpenter.sh_nodeclaims.yaml | 2 +- kwok/charts/crds/karpenter.sh_nodepools.yaml | 2 +- pkg/apis/crds/karpenter.sh_nodeclaims.yaml | 2 +- pkg/apis/crds/karpenter.sh_nodepools.yaml | 2 +- pkg/controllers/provisioning/provisioner.go | 2 +- .../scheduling/nodeclaimtemplate.go | 25 ++++++++------- pkg/controllers/provisioning/suite_test.go | 32 +++++++++++++++++++ .../karpenter.test.sh_testnodeclasses.yaml | 2 +- 9 files changed, 53 insertions(+), 18 deletions(-) diff --git a/kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml b/kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml index 79b9db76fc..c1657ab40a 100644 --- a/kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml +++ b/kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.2 + controller-gen.kubebuilder.io/version: v0.16.3 name: kwoknodeclasses.karpenter.kwok.sh spec: group: karpenter.kwok.sh diff --git a/kwok/charts/crds/karpenter.sh_nodeclaims.yaml b/kwok/charts/crds/karpenter.sh_nodeclaims.yaml index adbc5b4471..9494280de5 100644 --- a/kwok/charts/crds/karpenter.sh_nodeclaims.yaml +++ b/kwok/charts/crds/karpenter.sh_nodeclaims.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.2 + controller-gen.kubebuilder.io/version: v0.16.3 name: nodeclaims.karpenter.sh spec: group: karpenter.sh diff --git a/kwok/charts/crds/karpenter.sh_nodepools.yaml b/kwok/charts/crds/karpenter.sh_nodepools.yaml index 434c596165..8cd3b43747 100644 --- a/kwok/charts/crds/karpenter.sh_nodepools.yaml +++ b/kwok/charts/crds/karpenter.sh_nodepools.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.2 + controller-gen.kubebuilder.io/version: v0.16.3 name: nodepools.karpenter.sh spec: group: karpenter.sh diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index 2a008c6112..e70b6e2af7 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.2 + controller-gen.kubebuilder.io/version: v0.16.3 name: nodeclaims.karpenter.sh spec: group: karpenter.sh diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 5cf036d643..a22d8befeb 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.2 + controller-gen.kubebuilder.io/version: v0.16.3 name: nodepools.karpenter.sh spec: group: karpenter.sh diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go index aafd3bcf68..ab56bdc13f 100644 --- a/pkg/controllers/provisioning/provisioner.go +++ b/pkg/controllers/provisioning/provisioner.go @@ -361,7 +361,7 @@ func (p *Provisioner) Create(ctx context.Context, n *scheduler.NodeClaim, opts . if err := latest.Spec.Limits.ExceededBy(latest.Status.Resources); err != nil { return "", err } - nodeClaim := n.ToNodeClaim(latest) + nodeClaim := n.ToNodeClaim() if err := p.kubeClient.Create(ctx, nodeClaim); err != nil { return "", err diff --git a/pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go b/pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go index 73e462076f..379bec28ce 100644 --- a/pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go +++ b/pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go @@ -23,6 +23,7 @@ import ( "github.com/samber/lo" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" v1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider" @@ -40,6 +41,7 @@ type NodeClaimTemplate struct { v1.NodeClaim NodePoolName string + NodePoolUUID types.UID InstanceTypeOptions cloudprovider.InstanceTypes Requirements scheduling.Requirements } @@ -48,36 +50,37 @@ func NewNodeClaimTemplate(nodePool *v1.NodePool) *NodeClaimTemplate { nct := &NodeClaimTemplate{ NodeClaim: *nodePool.Spec.Template.ToNodeClaim(), NodePoolName: nodePool.Name, + NodePoolUUID: nodePool.UID, Requirements: scheduling.NewRequirements(), } + nct.Annotations = lo.Assign(nct.Annotations, map[string]string{ + v1.NodePoolHashAnnotationKey: nodePool.Hash(), + v1.NodePoolHashVersionAnnotationKey: v1.NodePoolHashVersion, + }) nct.Labels = lo.Assign(nct.Labels, map[string]string{v1.NodePoolLabelKey: nodePool.Name}) nct.Requirements.Add(scheduling.NewNodeSelectorRequirementsWithMinValues(nct.Spec.Requirements...).Values()...) nct.Requirements.Add(scheduling.NewLabelRequirements(nct.Labels).Values()...) return nct } -func (i *NodeClaimTemplate) ToNodeClaim(nodePool *v1.NodePool) *v1.NodeClaim { +func (i *NodeClaimTemplate) ToNodeClaim() *v1.NodeClaim { // Order the instance types by price and only take the first 100 of them to decrease the instance type size in the requirements instanceTypes := lo.Slice(i.InstanceTypeOptions.OrderByPrice(i.Requirements), 0, MaxInstanceTypes) i.Requirements.Add(scheduling.NewRequirementWithFlexibility(corev1.LabelInstanceTypeStable, corev1.NodeSelectorOpIn, i.Requirements.Get(corev1.LabelInstanceTypeStable).MinValues, lo.Map(instanceTypes, func(i *cloudprovider.InstanceType, _ int) string { return i.Name })...)) - gvk := object.GVK(nodePool) nc := &v1.NodeClaim{ ObjectMeta: metav1.ObjectMeta{ GenerateName: fmt.Sprintf("%s-", i.NodePoolName), - Annotations: lo.Assign(i.Annotations, map[string]string{ - v1.NodePoolHashAnnotationKey: nodePool.Hash(), - v1.NodePoolHashVersionAnnotationKey: v1.NodePoolHashVersion, - }), - Labels: i.Labels, + Annotations: i.Annotations, + Labels: i.Labels, OwnerReferences: []metav1.OwnerReference{ { - APIVersion: gvk.GroupVersion().String(), - Kind: gvk.Kind, - Name: nodePool.Name, - UID: nodePool.UID, + APIVersion: object.GVK(&v1.NodePool{}).GroupVersion().String(), + Kind: object.GVK(&v1.NodePool{}).Kind, + Name: i.NodePoolName, + UID: i.NodePoolUUID, BlockOwnerDeletion: lo.ToPtr(true), }, }, diff --git a/pkg/controllers/provisioning/suite_test.go b/pkg/controllers/provisioning/suite_test.go index 80c7e3d81e..dec8bbb662 100644 --- a/pkg/controllers/provisioning/suite_test.go +++ b/pkg/controllers/provisioning/suite_test.go @@ -29,6 +29,7 @@ import ( storagev1 "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" clock "k8s.io/utils/clock/testing" @@ -246,6 +247,37 @@ var _ = Describe("Provisioning", func() { ExpectScheduled(ctx, env.Client, pod) } }) + It("should not use a different NodePool hash on the NodeClaim if the NodePool changes during scheduling", func() { + // This test was added since we were generating the NodeClaim's NodePool hash from a NodePool that was re-retrieved + // after scheduling had been completed. This could have resulted in the hash not accurately reflecting the actual NodePool + // state that it was generated from + + nodePool := test.NodePool() + pod := test.UnschedulablePod() + hash := nodePool.Hash() + ExpectApplied(ctx, env.Client, nodePool, pod) + + results, err := prov.Schedule(ctx) + Expect(err).ToNot(HaveOccurred()) + + Expect(results.NewNodeClaims).To(HaveLen(1)) + Expect(results.NewNodeClaims[0].ToNodeClaim().Annotations).To(HaveKeyWithValue(v1.NodePoolHashAnnotationKey, hash)) + + nodePool.Spec.Template.Labels = lo.Assign(nodePool.Spec.Template.Labels, map[string]string{ + "new-label": "new-value", + }) + Expect(nodePool.Hash()).ToNot(Equal(hash)) + ExpectApplied(ctx, env.Client, nodePool) // apply the changed NodePool but expect no change in the hash on the NodeClaim + + nodeClaims, err := prov.CreateNodeClaims(ctx, results.NewNodeClaims) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeClaims).To(HaveLen(1)) + + nodeClaim := &v1.NodeClaim{} + Expect(env.Client.Get(ctx, types.NamespacedName{Name: nodeClaims[0]}, nodeClaim)).To(Succeed()) + + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodePoolHashAnnotationKey, hash)) + }) It("should schedule all pods on one inflight node when node is in deleting state", func() { nodePool := test.NodePool() its, err := cloudProvider.GetInstanceTypes(ctx, nodePool) diff --git a/pkg/test/v1alpha1/crds/karpenter.test.sh_testnodeclasses.yaml b/pkg/test/v1alpha1/crds/karpenter.test.sh_testnodeclasses.yaml index fb3c15c292..b8684d22e0 100644 --- a/pkg/test/v1alpha1/crds/karpenter.test.sh_testnodeclasses.yaml +++ b/pkg/test/v1alpha1/crds/karpenter.test.sh_testnodeclasses.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.2 + controller-gen.kubebuilder.io/version: v0.16.3 name: testnodeclasses.karpenter.test.sh spec: group: karpenter.test.sh