diff --git a/Makefile b/Makefile index c4400d87b8d3..0ea4e9247aa9 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ HELM_OPTS ?= --set serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn=${K --set controller.resources.limits.cpu=1 \ --set controller.resources.limits.memory=1Gi \ --set settings.featureGates.spotToSpotConsolidation=true \ + --set settings.featureGates.nodeRepair=true \ --create-namespace # CR for local builds of Karpenter diff --git a/charts/karpenter/templates/deployment.yaml b/charts/karpenter/templates/deployment.yaml index 952976cab027..4f0ea6b27576 100644 --- a/charts/karpenter/templates/deployment.yaml +++ b/charts/karpenter/templates/deployment.yaml @@ -103,7 +103,7 @@ spec: divisor: "0" resource: limits.memory - name: FEATURE_GATES - value: "SpotToSpotConsolidation={{ .Values.settings.featureGates.spotToSpotConsolidation }}" + value: "SpotToSpotConsolidation={{ .Values.settings.featureGates.spotToSpotConsolidation }},NodeRepair={{ .Values.settings.featureGates.nodeRepair }}" {{- with .Values.settings.batchMaxDuration }} - name: BATCH_MAX_DURATION value: "{{ . }}" diff --git a/charts/karpenter/values.yaml b/charts/karpenter/values.yaml index c865a2e9bf13..a6f7f38b71e7 100644 --- a/charts/karpenter/values.yaml +++ b/charts/karpenter/values.yaml @@ -181,3 +181,6 @@ settings: # -- spotToSpotConsolidation is ALPHA and is disabled by default. # Setting this to true will enable spot replacement consolidation for both single and multi-node consolidation. spotToSpotConsolidation: false + # -- nodeRepair is ALPHA and is disabled by default. + # Setting this to true will enable node repair. + nodeRepair: false diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 7ae7ae9d8034..119d5296210d 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -39,6 +39,7 @@ func main() { op. WithControllers(ctx, corecontrollers.NewControllers( + ctx, op.Manager, op.Clock, op.GetClient(), diff --git a/go.mod b/go.mod index 90432816931d..015189985783 100644 --- a/go.mod +++ b/go.mod @@ -42,7 +42,7 @@ require ( k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/controller-runtime v0.19.1 - sigs.k8s.io/karpenter v1.0.1-0.20241115180652-995040b20d0a + sigs.k8s.io/karpenter v1.0.1-0.20241119070053-a03600e47f44 sigs.k8s.io/yaml v1.4.0 ) diff --git a/go.sum b/go.sum index 2c93999fa463..172ae95736e2 100644 --- a/go.sum +++ b/go.sum @@ -323,8 +323,8 @@ sigs.k8s.io/controller-runtime v0.19.1 h1:Son+Q40+Be3QWb+niBXAg2vFiYWolDjjRfO8hn sigs.k8s.io/controller-runtime v0.19.1/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v1.0.1-0.20241115180652-995040b20d0a h1:fbD7tYsZCIu3uzgJKO9/XWS6uYPCeWxQmwAvP2jtXRE= -sigs.k8s.io/karpenter v1.0.1-0.20241115180652-995040b20d0a/go.mod h1:zolnK/3MxqSPEhEan2VBbzuGdReJPFTbpYWGivwTgic= +sigs.k8s.io/karpenter v1.0.1-0.20241119070053-a03600e47f44 h1:s8UHRXbVIfVXoyYG/er0rQaMY75wZYWlgzI/Tlzp68U= +sigs.k8s.io/karpenter v1.0.1-0.20241119070053-a03600e47f44/go.mod h1:zolnK/3MxqSPEhEan2VBbzuGdReJPFTbpYWGivwTgic= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 9c02442ccbbd..3faeba065877 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -246,6 +246,27 @@ func getTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1. }), staticTags) } +func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy { + return []cloudprovider.RepairPolicy{ + // Supported Kubelet fields + { + ConditionType: corev1.NodeReady, + ConditionStatus: corev1.ConditionFalse, + TolerationDuration: 30 * time.Minute, + }, + { + ConditionType: corev1.NodeDiskPressure, + ConditionStatus: corev1.ConditionTrue, + TolerationDuration: 30 * time.Minute, + }, + { + ConditionType: corev1.NodeMemoryPressure, + ConditionStatus: corev1.ConditionTrue, + TolerationDuration: 30 * time.Minute, + }, + } +} + func (c *CloudProvider) resolveNodeClassFromNodeClaim(ctx context.Context, nodeClaim *karpv1.NodeClaim) (*v1.EC2NodeClass, error) { nodeClass := &v1.EC2NodeClass{} if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Spec.NodeClassRef.Name}, nodeClass); err != nil { diff --git a/pkg/fake/cloudprovider.go b/pkg/fake/cloudprovider.go index 75d9ba516725..d21cd3d89bdb 100644 --- a/pkg/fake/cloudprovider.go +++ b/pkg/fake/cloudprovider.go @@ -89,3 +89,7 @@ func (c *CloudProvider) Name() string { func (c *CloudProvider) GetSupportedNodeClasses() []status.Object { return []status.Object{&v1.EC2NodeClass{}} } + +func (c *CloudProvider) RepairPolicies() []corecloudprovider.RepairPolicy { + return []corecloudprovider.RepairPolicy{} +} diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index 4112b24c24dd..d3d8acbc2b4a 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -89,6 +89,36 @@ func (env *Environment) ExpectUpdated(objects ...client.Object) { } } +// ExpectStatusUpdated will update objects in the cluster to match the inputs. +// WARNING: This ignores the resource version check, which can result in +// overwriting changes made by other controllers in the cluster. +// This is useful in ensuring that we can clean up resources by patching +// out finalizers. +// Grab the object before making the updates to reduce the chance of this race. +func (env *Environment) ExpectStatusUpdated(objects ...client.Object) { + GinkgoHelper() + for _, o := range objects { + Eventually(func(g Gomega) { + current := o.DeepCopyObject().(client.Object) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(current), current)).To(Succeed()) + if current.GetResourceVersion() != o.GetResourceVersion() { + log.FromContext(env).Info(fmt.Sprintf("detected an update to an object (%s) with an outdated resource version, did you get the latest version of the object before patching?", lo.Must(apiutil.GVKForObject(o, env.Client.Scheme())))) + } + o.SetResourceVersion(current.GetResourceVersion()) + g.Expect(env.Client.Status().Update(env.Context, o)).To(Succeed()) + }).WithTimeout(time.Second * 10).Should(Succeed()) + } +} + +func ReplaceNodeConditions(node *corev1.Node, conds ...corev1.NodeCondition) *corev1.Node { + keys := sets.New[string](lo.Map(conds, func(c corev1.NodeCondition, _ int) string { return string(c.Type) })...) + node.Status.Conditions = lo.Reject(node.Status.Conditions, func(c corev1.NodeCondition, _ int) bool { + return keys.Has(string(c.Type)) + }) + node.Status.Conditions = append(node.Status.Conditions, conds...) + return node +} + // ExpectCreatedOrUpdated can update objects in the cluster to match the inputs. // WARNING: ExpectUpdated ignores the resource version check, which can result in // overwriting changes made by other controllers in the cluster. diff --git a/test/suites/integration/repair_policy_test.go b/test/suites/integration/repair_policy_test.go new file mode 100644 index 000000000000..8836ec30f0ad --- /dev/null +++ b/test/suites/integration/repair_policy_test.go @@ -0,0 +1,137 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + karpenterv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + coretest "sigs.k8s.io/karpenter/pkg/test" + + "github.com/aws/karpenter-provider-aws/test/pkg/environment/common" + + . "github.com/onsi/ginkgo/v2" + "github.com/samber/lo" +) + +var _ = Describe("Repair Policy", func() { + var selector labels.Selector + var dep *appsv1.Deployment + var numPods int + var unhealthyCondition corev1.NodeCondition + + BeforeEach(func() { + unhealthyCondition = corev1.NodeCondition{ + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + } + numPods = 1 + // Add pods with a do-not-disrupt annotation so that we can check node metadata before we disrupt + dep = coretest.Deployment(coretest.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "my-app", + }, + Annotations: map[string]string{ + karpenterv1.DoNotDisruptAnnotationKey: "true", + }, + }, + TerminationGracePeriodSeconds: lo.ToPtr[int64](0), + }, + }) + selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + }) + + DescribeTable("Conditions", func(unhealthyCondition corev1.NodeCondition) { + env.ExpectCreated(nodeClass, nodePool, dep) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + env.EventuallyExpectInitializedNodeCount("==", 1) + + node = common.ReplaceNodeConditions(node, unhealthyCondition) + env.ExpectStatusUpdated(node) + + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }, + Entry("Readiness", corev1.NodeCondition{ + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + Entry("DiskPressure", corev1.NodeCondition{ + Type: corev1.NodeDiskPressure, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + Entry("MemoryPressure", corev1.NodeCondition{ + Type: corev1.NodeMemoryPressure, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + ) + It("should ignore disruption budgets", func() { + nodePool.Spec.Disruption.Budgets = []karpenterv1.Budget{ + { + Nodes: "0", + }, + } + env.ExpectCreated(nodeClass, nodePool, dep) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + env.EventuallyExpectInitializedNodeCount("==", 1) + + node = common.ReplaceNodeConditions(node, unhealthyCondition) + env.ExpectStatusUpdated(node) + + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) + It("should ignore do-not-disrupt annotation on node", func() { + env.ExpectCreated(nodeClass, nodePool, dep) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + env.EventuallyExpectInitializedNodeCount("==", 1) + + node.Annotations[karpenterv1.DoNotDisruptAnnotationKey] = "true" + env.ExpectUpdated(node) + + node = common.ReplaceNodeConditions(node, unhealthyCondition) + env.ExpectStatusUpdated(node) + + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) + It("should ignore terminationGracePeriod on the nodepool", func() { + nodePool.Spec.Template.Spec.TerminationGracePeriod = &metav1.Duration{Duration: time.Hour} + env.ExpectCreated(nodeClass, nodePool, dep) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + env.EventuallyExpectInitializedNodeCount("==", 1) + + node = common.ReplaceNodeConditions(node, unhealthyCondition) + env.ExpectStatusUpdated(node) + + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) +})