From 2294120cf9372a43877e7cfb0637c67247cfaa75 Mon Sep 17 00:00:00 2001 From: Amanuel Engeda Date: Wed, 18 Dec 2024 15:13:52 -0800 Subject: [PATCH] Add Support for Node Monitoring Agent --- .../templates/karpenter.sh_nodeclaims.yaml | 4 +++ go.mod | 4 +-- go.sum | 8 ++--- pkg/apis/crds/karpenter.sh_nodeclaims.yaml | 4 +++ pkg/cloudprovider/cloudprovider.go | 29 ++++++++++++++++++- test/suites/integration/repair_policy_test.go | 27 +++++++++++++++++ 6 files changed, 69 insertions(+), 7 deletions(-) diff --git a/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml b/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml index 487f12a9af1f..01531fea5da8 100644 --- a/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml +++ b/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml @@ -38,6 +38,10 @@ spec: - jsonPath: .metadata.creationTimestamp name: Age type: date + - jsonPath: .status.imageID + name: ImageID + priority: 1 + type: string - jsonPath: .status.providerID name: ID priority: 1 diff --git a/go.mod b/go.mod index 74dab8736af0..99806846d8b2 100644 --- a/go.mod +++ b/go.mod @@ -43,7 +43,7 @@ require ( k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 sigs.k8s.io/controller-runtime v0.19.3 - sigs.k8s.io/karpenter v1.1.1 + sigs.k8s.io/karpenter v1.1.2-0.20241220005608-b3fa6ebffc19 sigs.k8s.io/yaml v1.4.0 ) @@ -103,7 +103,7 @@ require ( golang.org/x/oauth2 v0.23.0 // indirect golang.org/x/sys v0.26.0 // indirect golang.org/x/term v0.25.0 // indirect - golang.org/x/text v0.20.0 // indirect + golang.org/x/text v0.21.0 // indirect golang.org/x/time v0.8.0 // indirect golang.org/x/tools v0.26.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect diff --git a/go.sum b/go.sum index 5df86c3e428f..8e3af7af7927 100644 --- a/go.sum +++ b/go.sum @@ -257,8 +257,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= -golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -314,8 +314,8 @@ sigs.k8s.io/controller-runtime v0.19.3 h1:XO2GvC9OPftRst6xWCpTgBZO04S2cbp0Qqkj8b sigs.k8s.io/controller-runtime v0.19.3/go.mod h1:j4j87DqtsThvwTv5/Tc5NFRyyF/RF0ip4+62tbTSIUM= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= -sigs.k8s.io/karpenter v1.1.1 h1:QPpVC8DsaLgJ/YWcFpZKE4m3jD+Qp88/GtSPvMfffck= -sigs.k8s.io/karpenter v1.1.1/go.mod h1:NQouOJNK6s1d4EIKa5cY7nAV3IG74qZ6gPzHBeCZNPw= +sigs.k8s.io/karpenter v1.1.2-0.20241220005608-b3fa6ebffc19 h1:nCaZE6O7772FEEPGgTef05IanE8AWMKf7DBh1LiU1ik= +sigs.k8s.io/karpenter v1.1.2-0.20241220005608-b3fa6ebffc19/go.mod h1:E1mtCutIoQJA05ClYYQo9y+5ujk6U2FxByauGSUXXZs= sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA= sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index bfe259dea177..759903b3233e 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -35,6 +35,10 @@ spec: - jsonPath: .metadata.creationTimestamp name: Age type: date + - jsonPath: .status.imageID + name: ImageID + priority: 1 + type: string - jsonPath: .status.providerID name: ID priority: 1 diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 3fe6b6f2706c..1169b1a3a6b0 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -259,7 +259,7 @@ func getTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1. func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy { return []cloudprovider.RepairPolicy{ - // Supported Kubelet fields + // Supported Kubelet Node Conditions { ConditionType: corev1.NodeReady, ConditionStatus: corev1.ConditionFalse, @@ -270,6 +270,33 @@ func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy { ConditionStatus: corev1.ConditionUnknown, TolerationDuration: 30 * time.Minute, }, + // Support Node Monitoring Agent Conditions + // + { + ConditionType: "AcceleratedHardwareReady", + ConditionStatus: corev1.ConditionFalse, + TolerationDuration: 10 * time.Minute, + }, + { + ConditionType: "StorageReady", + ConditionStatus: corev1.ConditionFalse, + TolerationDuration: 30 * time.Minute, + }, + { + ConditionType: "NetworkingReady", + ConditionStatus: corev1.ConditionFalse, + TolerationDuration: 30 * time.Minute, + }, + { + ConditionType: "KernelReady", + ConditionStatus: corev1.ConditionFalse, + TolerationDuration: 30 * time.Minute, + }, + { + ConditionType: "ContainerRuntimeReady", + ConditionStatus: corev1.ConditionFalse, + TolerationDuration: 30 * time.Minute, + }, } } diff --git a/test/suites/integration/repair_policy_test.go b/test/suites/integration/repair_policy_test.go index 56d464d972b8..6a91ed940ed0 100644 --- a/test/suites/integration/repair_policy_test.go +++ b/test/suites/integration/repair_policy_test.go @@ -73,6 +73,7 @@ var _ = Describe("Repair Policy", func() { env.EventuallyExpectNotFound(pod, node) env.EventuallyExpectHealthyPodCount(selector, numPods) }, + // Kubelet Supported Conditions Entry("Node Ready False", corev1.NodeCondition{ Type: corev1.NodeReady, Status: corev1.ConditionFalse, @@ -83,6 +84,32 @@ var _ = Describe("Repair Policy", func() { Status: corev1.ConditionUnknown, LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, }), + // Node Monitoring Agent Supported Conditions + Entry("Node AcceleratedHardwareReady False", corev1.NodeCondition{ + Type: "AcceleratedHardwareReady", + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-11 * time.Minute)}, + }), + Entry("Node StorageReady False", corev1.NodeCondition{ + Type: "StorageReady", + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + Entry("Node NetworkingReady False", corev1.NodeCondition{ + Type: "NetworkingReady", + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + Entry("Node KernelReady False", corev1.NodeCondition{ + Type: "KernelReady", + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + Entry("Node ContainerRuntimeReady False", corev1.NodeCondition{ + Type: "ContainerRuntimeReady", + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), ) It("should ignore disruption budgets", func() { nodePool.Spec.Disruption.Budgets = []karpenterv1.Budget{