From 10ae4ef32c62f93aeab148fddd76b5d039abfc69 Mon Sep 17 00:00:00 2001 From: Ellis Tarn Date: Mon, 2 Aug 2021 15:26:40 -0700 Subject: [PATCH] Refactored expiration controller into node controller --- cmd/controller/main.go | 4 - pkg/apis/provisioning/v1alpha3/provisioner.go | 3 +- .../v1alpha3/provisioner_validation.go | 1 - .../v1alpha3/provisioner_validation_test.go | 1 - pkg/cloudprovider/aws/instance.go | 2 +- pkg/controllers/allocation/controller.go | 2 +- pkg/controllers/expiration/controller.go | 119 -------- pkg/controllers/expiration/suite_test.go | 115 ------- pkg/controllers/node/controller.go | 77 ++++- pkg/controllers/node/emptiness.go | 91 ++++++ pkg/controllers/node/expiration.go | 53 ++++ pkg/controllers/node/finalizer.go | 15 +- pkg/controllers/node/liveness.go | 47 +++ pkg/controllers/node/readiness.go | 15 +- pkg/controllers/node/suite_test.go | 285 ++++++++++++++---- pkg/controllers/reallocation/controller.go | 109 ------- pkg/controllers/reallocation/suite_test.go | 213 ------------- pkg/controllers/reallocation/utilization.go | 163 ---------- pkg/test/nodes.go | 2 +- pkg/utils/functional/functional.go | 26 ++ pkg/utils/pod/scheduling.go | 13 - 21 files changed, 535 insertions(+), 821 deletions(-) delete mode 100644 pkg/controllers/expiration/controller.go delete mode 100644 pkg/controllers/expiration/suite_test.go create mode 100644 pkg/controllers/node/emptiness.go create mode 100644 pkg/controllers/node/expiration.go create mode 100644 pkg/controllers/node/liveness.go delete mode 100644 pkg/controllers/reallocation/controller.go delete mode 100644 pkg/controllers/reallocation/suite_test.go delete mode 100644 pkg/controllers/reallocation/utilization.go diff --git a/cmd/controller/main.go b/cmd/controller/main.go index fe9e8941071c..cd4aac145901 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -24,9 +24,7 @@ import ( "github.com/awslabs/karpenter/pkg/cloudprovider/registry" "github.com/awslabs/karpenter/pkg/controllers" "github.com/awslabs/karpenter/pkg/controllers/allocation" - "github.com/awslabs/karpenter/pkg/controllers/expiration" "github.com/awslabs/karpenter/pkg/controllers/node" - "github.com/awslabs/karpenter/pkg/controllers/reallocation" "github.com/awslabs/karpenter/pkg/controllers/termination" "github.com/go-logr/zapr" "k8s.io/apimachinery/pkg/runtime" @@ -82,9 +80,7 @@ func main() { HealthProbeBindAddress: fmt.Sprintf(":%d", options.HealthProbePort), }) if err := manager.RegisterControllers(ctx, - expiration.NewController(manager.GetClient()), allocation.NewController(manager.GetClient(), clientSet.CoreV1(), cloudProvider), - reallocation.NewController(manager.GetClient(), cloudProvider), termination.NewController(ctx, manager.GetClient(), clientSet.CoreV1(), cloudProvider), node.NewController(manager.GetClient()), ).Start(ctx); err != nil { diff --git a/pkg/apis/provisioning/v1alpha3/provisioner.go b/pkg/apis/provisioning/v1alpha3/provisioner.go index 2a836d0475b7..8189f56c62d3 100644 --- a/pkg/apis/provisioning/v1alpha3/provisioner.go +++ b/pkg/apis/provisioning/v1alpha3/provisioner.go @@ -121,8 +121,7 @@ var ( NotReadyTaintKey = SchemeGroupVersion.Group + "/not-ready" // Reserved labels - ProvisionerNameLabelKey = SchemeGroupVersion.Group + "/provisioner-name" - ProvisionerUnderutilizedLabelKey = SchemeGroupVersion.Group + "/underutilized" + ProvisionerNameLabelKey = SchemeGroupVersion.Group + "/provisioner-name" // Reserved annotations KarpenterDoNotEvictPodAnnotation = SchemeGroupVersion.Group + "/do-not-evict" diff --git a/pkg/apis/provisioning/v1alpha3/provisioner_validation.go b/pkg/apis/provisioning/v1alpha3/provisioner_validation.go index c07665df153d..49c2b95bd6f3 100644 --- a/pkg/apis/provisioning/v1alpha3/provisioner_validation.go +++ b/pkg/apis/provisioning/v1alpha3/provisioner_validation.go @@ -32,7 +32,6 @@ var ( ArchitectureLabelKey, OperatingSystemLabelKey, ProvisionerNameLabelKey, - ProvisionerUnderutilizedLabelKey, ProvisionerTTLAfterEmptyKey, ZoneLabelKey, InstanceTypeLabelKey, diff --git a/pkg/apis/provisioning/v1alpha3/provisioner_validation_test.go b/pkg/apis/provisioning/v1alpha3/provisioner_validation_test.go index 6756a5b3dc4f..2e7f130aa261 100644 --- a/pkg/apis/provisioning/v1alpha3/provisioner_validation_test.go +++ b/pkg/apis/provisioning/v1alpha3/provisioner_validation_test.go @@ -91,7 +91,6 @@ var _ = Describe("Validation", func() { ArchitectureLabelKey, OperatingSystemLabelKey, ProvisionerNameLabelKey, - ProvisionerUnderutilizedLabelKey, ZoneLabelKey, InstanceTypeLabelKey, } { diff --git a/pkg/cloudprovider/aws/instance.go b/pkg/cloudprovider/aws/instance.go index f1da2e095c58..a8a36f419695 100644 --- a/pkg/cloudprovider/aws/instance.go +++ b/pkg/cloudprovider/aws/instance.go @@ -178,7 +178,7 @@ func (p *InstanceProvider) getInstance(ctx context.Context, id *string, instance } *instance = *describeInstancesOutput.Reservations[0].Instances[0] if len(aws.StringValue(instance.PrivateDnsName)) == 0 { - return fmt.Errorf("expected PrivateDnsName to be set") + return fmt.Errorf("got instance %s but PrivateDnsName was not set", aws.StringValue(instance.InstanceId)) } return nil } diff --git a/pkg/controllers/allocation/controller.go b/pkg/controllers/allocation/controller.go index acda6749214f..39b1c90af93f 100644 --- a/pkg/controllers/allocation/controller.go +++ b/pkg/controllers/allocation/controller.go @@ -85,7 +85,7 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco logging.FromContext(ctx).Errorf("Provisioner \"%s\" not found. Create the \"default\" provisioner or specify an alternative using the nodeSelector %s", req.Name, v1alpha3.ProvisionerNameLabelKey) return reconcile.Result{}, nil } - return reconcile.Result{}, err + return result.RetryIfError(ctx, err) } // 2. Wait on a pod batch diff --git a/pkg/controllers/expiration/controller.go b/pkg/controllers/expiration/controller.go deleted file mode 100644 index 81c67db28313..000000000000 --- a/pkg/controllers/expiration/controller.go +++ /dev/null @@ -1,119 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package expiration - -import ( - "context" - "fmt" - "time" - - "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/types" - "knative.dev/pkg/logging" - "knative.dev/pkg/ptr" - - controllerruntime "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - "sigs.k8s.io/controller-runtime/pkg/source" -) - -// Controller for the resource -type Controller struct { - kubeClient client.Client -} - -// NewController constructs a controller instance -func NewController(kubeClient client.Client) *Controller { - return &Controller{ - kubeClient: kubeClient, - } -} - -// Reconcile executes an expiration control loop for a node -func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).Named("Expiration")) - // 1. Get node - node := &v1.Node{} - if err := c.kubeClient.Get(ctx, req.NamespacedName, node); err != nil { - if errors.IsNotFound(err) { - return reconcile.Result{}, nil - } - return reconcile.Result{}, err - } - // 2. Ignore if node is already deleting - if !node.DeletionTimestamp.IsZero() { - return reconcile.Result{}, nil - } - // 3. Ignore if provisioner doesn't exist - name, ok := node.Labels[v1alpha3.ProvisionerNameLabelKey] - if !ok { - return reconcile.Result{}, nil - } - provisioner := &v1alpha3.Provisioner{} - if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: name}, provisioner); err != nil { - if errors.IsNotFound(err) { - return reconcile.Result{}, nil - } - return reconcile.Result{}, err - } - // 4. Ignore if TTLSecondsUntilExpired isn't defined - if provisioner.Spec.TTLSecondsUntilExpired == nil { - return reconcile.Result{}, nil - } - // 5. Trigger termination workflow if expired - expirationTTL := time.Duration(ptr.Int64Value(provisioner.Spec.TTLSecondsUntilExpired)) * time.Second - expirationTime := node.CreationTimestamp.Add(expirationTTL) - if time.Now().After(expirationTime) { - logging.FromContext(ctx).Infof("Triggering termination for expired node %s after %s (+%s)", node.Name, expirationTTL, time.Since(expirationTime)) - if err := c.kubeClient.Delete(ctx, node); err != nil { - return reconcile.Result{}, fmt.Errorf("expiring node %s, %w", node.Name, err) - } - return reconcile.Result{}, nil - } - - // 6. Backoff until expired - return reconcile.Result{RequeueAfter: time.Until(expirationTime)}, nil -} - -func (c *Controller) provisionerToNodes(ctx context.Context, o client.Object) (requests []reconcile.Request) { - nodes := &v1.NodeList{} - if err := c.kubeClient.List(ctx, nodes, client.MatchingLabels(map[string]string{ - v1alpha3.ProvisionerNameLabelKey: o.GetName(), - })); err != nil { - logging.FromContext(ctx).Errorf("Failed to list nodes when mapping expiration watch events, %s", err.Error()) - } - for _, node := range nodes.Items { - requests = append(requests, reconcile.Request{NamespacedName: types.NamespacedName{Name: node.Name}}) - } - return requests -} - -func (c *Controller) Register(ctx context.Context, m manager.Manager) error { - return controllerruntime. - NewControllerManagedBy(m). - Named("Expiration"). - For(&v1.Node{}). - Watches( - // Reconcile all nodes related to a provisioner when it changes. - &source.Kind{Type: &v1alpha3.Provisioner{}}, - handler.EnqueueRequestsFromMapFunc(func(o client.Object) (requests []reconcile.Request) { return c.provisionerToNodes(ctx, o) }), - ). - Complete(c) -} diff --git a/pkg/controllers/expiration/suite_test.go b/pkg/controllers/expiration/suite_test.go deleted file mode 100644 index 97e6462cffa0..000000000000 --- a/pkg/controllers/expiration/suite_test.go +++ /dev/null @@ -1,115 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package expiration_test - -import ( - "context" - "testing" - - "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" - "github.com/awslabs/karpenter/pkg/controllers/expiration" - "github.com/awslabs/karpenter/pkg/test" - . "github.com/awslabs/karpenter/pkg/test/expectations" - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - . "knative.dev/pkg/logging/testing" - "knative.dev/pkg/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -var ctx context.Context -var controller *expiration.Controller -var env *test.Environment - -func TestAPIs(t *testing.T) { - ctx = TestContextWithLogger(t) - RegisterFailHandler(Fail) - RunSpecs(t, "Expiration") -} - -var _ = BeforeSuite(func() { - env = test.NewEnvironment(ctx, func(e *test.Environment) { - controller = expiration.NewController(e.Client) - }) - Expect(env.Start()).To(Succeed(), "Failed to start environment") -}) - -var _ = AfterSuite(func() { - Expect(env.Stop()).To(Succeed(), "Failed to stop environment") -}) - -var _ = Describe("Reconciliation", func() { - var provisioner *v1alpha3.Provisioner - - BeforeEach(func() { - provisioner = &v1alpha3.Provisioner{ - ObjectMeta: metav1.ObjectMeta{Name: v1alpha3.DefaultProvisioner.Name}, - Spec: v1alpha3.ProvisionerSpec{ - Cluster: v1alpha3.Cluster{Name: ptr.String("test-cluster"), Endpoint: "http://test-cluster", CABundle: ptr.String("dGVzdC1jbHVzdGVyCg==")}, - TTLSecondsUntilExpired: ptr.Int64(30), - }, - } - }) - - AfterEach(func() { - ExpectCleanedUp(env.Client) - }) - It("should ignore nodes without TTLSecondsUntilExpired", func() { - node := test.Node(test.NodeOptions{ - Labels: map[string]string{ - v1alpha3.ProvisionerNameLabelKey: provisioner.Name, - }, - }) - provisioner.Spec.TTLSecondsUntilExpired = nil - ExpectCreated(env.Client, provisioner, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) - - node = ExpectNodeExists(env.Client, node.Name) - Expect(node.DeletionTimestamp.IsZero()).To(BeTrue()) - }) - It("should ignore nodes without a provisioner", func() { - node := test.Node(test.NodeOptions{}) - ExpectCreated(env.Client, provisioner, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) - - node = ExpectNodeExists(env.Client, node.Name) - Expect(node.DeletionTimestamp.IsZero()).To(BeTrue()) - }) - It("should not terminate nodes before expiry", func() { - node := test.Node(test.NodeOptions{ - Labels: map[string]string{ - v1alpha3.ProvisionerNameLabelKey: provisioner.Name, - }, - }) - ExpectCreated(env.Client, provisioner, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) - - node = ExpectNodeExists(env.Client, node.Name) - Expect(node.DeletionTimestamp.IsZero()).To(BeTrue()) - }) - It("should terminate nodes after expiry", func() { - provisioner.Spec.TTLSecondsUntilExpired = ptr.Int64(0) - node := test.Node(test.NodeOptions{ - Labels: map[string]string{ - v1alpha3.ProvisionerNameLabelKey: provisioner.Name, - }, - }) - ExpectCreated(env.Client, provisioner, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) - - ExpectNotFound(env.Client, node) - }) -}) diff --git a/pkg/controllers/node/controller.go b/pkg/controllers/node/controller.go index 904dc4c5c728..ad9102ddef7a 100644 --- a/pkg/controllers/node/controller.go +++ b/pkg/controllers/node/controller.go @@ -17,26 +17,34 @@ package node import ( "context" "fmt" - "reflect" + "time" "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" + "github.com/awslabs/karpenter/pkg/utils/functional" "github.com/awslabs/karpenter/pkg/utils/result" - "go.uber.org/multierr" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" "knative.dev/pkg/logging" controllerruntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" ) // NewController constructs a controller instance func NewController(kubeClient client.Client) *Controller { return &Controller{ kubeClient: kubeClient, + liveness: &Liveness{kubeClient: kubeClient}, + emptiness: &Emptiness{kubeClient: kubeClient}, + expiration: &Expiration{kubeClient: kubeClient}, } } @@ -45,50 +53,97 @@ func NewController(kubeClient client.Client) *Controller { type Controller struct { kubeClient client.Client readiness *Readiness + liveness *Liveness + emptiness *Emptiness + expiration *Expiration finalizer *Finalizer } // Reconcile executes a reallocation control loop for the resource func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { ctx = logging.WithLogger(ctx, logging.FromContext(ctx).Named("Node")) - // 1. Retrieve node from reconcile request + // 1. Retrieve Node, ignore if not provisioned or terminating stored := &v1.Node{} if err := c.kubeClient.Get(ctx, req.NamespacedName, stored); err != nil { if errors.IsNotFound(err) { return reconcile.Result{}, nil } - return reconcile.Result{}, err + return result.RetryIfError(ctx, err) } if _, ok := stored.Labels[v1alpha3.ProvisionerNameLabelKey]; !ok { return reconcile.Result{}, nil } + if !stored.DeletionTimestamp.IsZero() { + return reconcile.Result{}, nil + } - // 2. Execute node reconcilers + // 2. Retrieve Provisioner + provisioner := &v1alpha3.Provisioner{} + if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: stored.Labels[v1alpha3.ProvisionerNameLabelKey]}, provisioner); err != nil { + return result.RetryIfError(ctx, err) + } + + // 3. Execute reconcilers node := stored.DeepCopy() + var backoffs []time.Duration var errs error for _, reconciler := range []interface { - Reconcile(*v1.Node) error + Reconcile(context.Context, *v1alpha3.Provisioner, *v1.Node) (reconcile.Result, error) }{ c.readiness, + c.liveness, + c.expiration, + c.emptiness, c.finalizer, } { - errs = multierr.Append(errs, reconciler.Reconcile(node)) + res, err := reconciler.Reconcile(ctx, provisioner, node) + errs = multierr.Append(errs, err) + backoffs = append(backoffs, res.RequeueAfter) } - // 3. Patch any changes, regardless of errors - if !reflect.DeepEqual(node, stored) { + // 4. Patch any changes, regardless of errors + if !equality.Semantic.DeepEqual(node, stored) { if err := c.kubeClient.Patch(ctx, node, client.MergeFrom(stored)); err != nil { return reconcile.Result{}, fmt.Errorf("patching node %s, %w", node.Name, err) } } - return result.RetryIfError(ctx, errs) + // 5. Requeue if error or if retryAfter is set + if errs != nil { + return result.RetryIfError(ctx, errs) + } + return reconcile.Result{RequeueAfter: functional.MaxDuration(backoffs...)}, nil } -func (c *Controller) Register(_ context.Context, m manager.Manager) error { +func (c *Controller) Register(ctx context.Context, m manager.Manager) error { return controllerruntime. NewControllerManagedBy(m). Named("Node"). For(&v1.Node{}). + Watches( + // Reconcile all nodes related to a provisioner when it changes. + &source.Kind{Type: &v1alpha3.Provisioner{}}, + handler.EnqueueRequestsFromMapFunc(func(o client.Object) (requests []reconcile.Request) { + nodes := &v1.NodeList{} + if err := c.kubeClient.List(ctx, nodes, client.MatchingLabels(map[string]string{v1alpha3.ProvisionerNameLabelKey: o.GetName()})); err != nil { + logging.FromContext(ctx).Errorf("Failed to list nodes when mapping expiration watch events, %s", err.Error()) + return requests + } + for _, node := range nodes.Items { + requests = append(requests, reconcile.Request{NamespacedName: types.NamespacedName{Name: node.Name}}) + } + return requests + }), + ). + Watches( + // Reconcile node when a pod assigned to it changes. + &source.Kind{Type: &v1.Pod{}}, + handler.EnqueueRequestsFromMapFunc(func(o client.Object) (requests []reconcile.Request) { + if name := o.(*v1.Pod).Spec.NodeName; name != "" { + requests = append(requests, reconcile.Request{NamespacedName: types.NamespacedName{Name: name}}) + } + return requests + }), + ). WithOptions(controller.Options{MaxConcurrentReconciles: 10}). Complete(c) } diff --git a/pkg/controllers/node/emptiness.go b/pkg/controllers/node/emptiness.go new file mode 100644 index 000000000000..2e23773c5363 --- /dev/null +++ b/pkg/controllers/node/emptiness.go @@ -0,0 +1,91 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "context" + "fmt" + "time" + + "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" + "github.com/awslabs/karpenter/pkg/utils/functional" + "github.com/awslabs/karpenter/pkg/utils/node" + "github.com/awslabs/karpenter/pkg/utils/pod" + "github.com/awslabs/karpenter/pkg/utils/ptr" + v1 "k8s.io/api/core/v1" + "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +// Emptiness is a subreconciler that terminates nodes that are empty after a ttl +type Emptiness struct { + kubeClient client.Client +} + +// Reconcile reconciles the node +func (r *Emptiness) Reconcile(ctx context.Context, provisioner *v1alpha3.Provisioner, n *v1.Node) (reconcile.Result, error) { + // 1. Ignore node if not applicable + if provisioner.Spec.TTLSecondsAfterEmpty == nil { + return reconcile.Result{}, nil + } + if !node.IsReady(n) { + return reconcile.Result{}, nil + } + // 2. Remove ttl if not empty + empty, err := r.isEmpty(ctx, n) + if err != nil { + return reconcile.Result{}, err + } + if !empty { + if _, ok := n.Annotations[v1alpha3.ProvisionerTTLAfterEmptyKey]; ok { + delete(n.Annotations, v1alpha3.ProvisionerTTLAfterEmptyKey) + logging.FromContext(ctx).Infof("Removed emptiness TTL from node %s", n.Name) + } + return reconcile.Result{}, nil + } + // 3. Set TTL if not set + n.Annotations = functional.UnionStringMaps(n.Annotations) + if _, ok := n.Annotations[v1alpha3.ProvisionerTTLAfterEmptyKey]; !ok { + ttl := time.Duration(ptr.Int64Value(provisioner.Spec.TTLSecondsAfterEmpty)) * time.Second + n.Annotations[v1alpha3.ProvisionerTTLAfterEmptyKey] = time.Now().Add(ttl).Format(time.RFC3339) + logging.FromContext(ctx).Infof("Added TTL (+%s) to empty node %s", ttl, n.Name) + return reconcile.Result{RequeueAfter: ttl}, nil + } + // 4. Delete node if beyond TTL + if node.IsPastEmptyTTL(n) { + logging.FromContext(ctx).Infof("Triggering termination for empty node %s", n.Name) + if err := r.kubeClient.Delete(ctx, n); err != nil { + return reconcile.Result{}, fmt.Errorf("deleting node %s, %w", n.Name, err) + } + } + return reconcile.Result{}, nil +} + +func (r *Emptiness) isEmpty(ctx context.Context, n *v1.Node) (bool, error) { + pods := &v1.PodList{} + if err := r.kubeClient.List(ctx, pods, client.MatchingFields{"spec.nodeName": n.Name}); err != nil { + return false, fmt.Errorf("listing pods for node %s, %w", n.Name, err) + } + for _, p := range pods.Items { + if pod.HasFailed(&p) { + continue + } + if !pod.IsOwnedByDaemonSet(&p) { + return false, nil + } + } + return true, nil +} diff --git a/pkg/controllers/node/expiration.go b/pkg/controllers/node/expiration.go new file mode 100644 index 000000000000..4f7ce3ebb860 --- /dev/null +++ b/pkg/controllers/node/expiration.go @@ -0,0 +1,53 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "context" + "fmt" + "time" + + "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" + "github.com/awslabs/karpenter/pkg/utils/ptr" + v1 "k8s.io/api/core/v1" + "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +// Expiration is a subreconciler that terminates nodes after a period of time. +type Expiration struct { + kubeClient client.Client +} + +// Reconcile reconciles the node +func (r *Expiration) Reconcile(ctx context.Context, provisioner *v1alpha3.Provisioner, node *v1.Node) (reconcile.Result, error) { + // 1. Ignore if TTLSecondsUntilExpired isn't defined + if provisioner.Spec.TTLSecondsUntilExpired == nil { + return reconcile.Result{}, nil + } + // 2. Trigger termination workflow if expired + now := time.Now() + expirationTTL := time.Duration(ptr.Int64Value(provisioner.Spec.TTLSecondsUntilExpired)) * time.Second + expirationTime := node.CreationTimestamp.Add(expirationTTL) + if now.After(expirationTime) { + logging.FromContext(ctx).Infof("Triggering termination for expired node %s after %s (+%s)", node.Name, expirationTTL, time.Since(expirationTime)) + if err := r.kubeClient.Delete(ctx, node); err != nil { + return reconcile.Result{}, fmt.Errorf("deleting node, %w", err) + } + } + // 3. Backoff until expired + return reconcile.Result{RequeueAfter: time.Until(expirationTime)}, nil +} diff --git a/pkg/controllers/node/finalizer.go b/pkg/controllers/node/finalizer.go index 18abbd9d6f62..06a048029f95 100644 --- a/pkg/controllers/node/finalizer.go +++ b/pkg/controllers/node/finalizer.go @@ -15,24 +15,27 @@ limitations under the License. package node import ( + "context" + "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" "github.com/awslabs/karpenter/pkg/utils/functional" v1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -// Finalizer is a tiny reconciler that ensures nodes have the termination +// Finalizer is a subreconciler that ensures nodes have the termination // finalizer. This protects against instances that launch when Karpenter fails // to create the node object. In this case, the node will come online without // the termination finalizer. This controller will update the node accordingly. -type Finalizer struct {} +type Finalizer struct{} -// Reconcile adds the termination finalizer if the node is not deleting -func (r *Finalizer) Reconcile(n *v1.Node) error { +// Reconcile reconciles the node +func (r *Finalizer) Reconcile(ctx context.Context, provisioner *v1alpha3.Provisioner, n *v1.Node) (reconcile.Result, error) { if !n.DeletionTimestamp.IsZero() { - return nil + return reconcile.Result{}, nil } if !functional.ContainsString(n.Finalizers, v1alpha3.TerminationFinalizer) { n.Finalizers = append(n.Finalizers, v1alpha3.TerminationFinalizer) } - return nil + return reconcile.Result{}, nil } diff --git a/pkg/controllers/node/liveness.go b/pkg/controllers/node/liveness.go new file mode 100644 index 000000000000..d664a7b4e668 --- /dev/null +++ b/pkg/controllers/node/liveness.go @@ -0,0 +1,47 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "context" + "fmt" + "time" + + "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" + "github.com/awslabs/karpenter/pkg/utils/node" + v1 "k8s.io/api/core/v1" + "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +const LivenessTimeout = 5 * time.Minute + +// Liveness is a subreconciler that deletes nodes if its determined to be unrecoverable +type Liveness struct { + kubeClient client.Client +} + +// Reconcile reconciles the node +func (r *Liveness) Reconcile(ctx context.Context, provisioner *v1alpha3.Provisioner, n *v1.Node) (reconcile.Result, error) { + if !node.FailedToJoin(n, LivenessTimeout) { + return reconcile.Result{}, nil + } + logging.FromContext(ctx).Infof("Triggering termination for node that failed to join %s", n.Name) + if err := r.kubeClient.Delete(ctx, n); err != nil { + return reconcile.Result{}, fmt.Errorf("deleting node %s, %w", n.Name, err) + } + return reconcile.Result{}, nil +} diff --git a/pkg/controllers/node/readiness.go b/pkg/controllers/node/readiness.go index 8b72f0fa20e9..200f5344a0e0 100644 --- a/pkg/controllers/node/readiness.go +++ b/pkg/controllers/node/readiness.go @@ -15,18 +15,21 @@ limitations under the License. package node import ( + "context" + "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" "github.com/awslabs/karpenter/pkg/utils/node" v1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -// Readiness is a tiny reconciler that removes the readiness taint when the node is ready -type Readiness struct {} +// Readiness is a subreconciler that removes the NotReady taint when the node is ready +type Readiness struct{} -// Reconcile removes the NotReady taint when the node is ready -func (r *Readiness) Reconcile(n *v1.Node) error { +// Reconcile reconciles the node +func (r *Readiness) Reconcile(ctx context.Context, provisioner *v1alpha3.Provisioner, n *v1.Node) (reconcile.Result, error) { if !node.IsReady(n) { - return nil + return reconcile.Result{}, nil } taints := []v1.Taint{} for _, taint := range n.Spec.Taints { @@ -35,5 +38,5 @@ func (r *Readiness) Reconcile(n *v1.Node) error { } } n.Spec.Taints = taints - return nil + return reconcile.Result{}, nil } diff --git a/pkg/controllers/node/suite_test.go b/pkg/controllers/node/suite_test.go index 840abf0c964e..030f51b54ce6 100644 --- a/pkg/controllers/node/suite_test.go +++ b/pkg/controllers/node/suite_test.go @@ -16,8 +16,11 @@ package node_test import ( "context" + "strings" "testing" + "time" + "bou.ke/monkey" "github.com/Pallinder/go-randomdata" "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" "github.com/awslabs/karpenter/pkg/controllers/node" @@ -27,7 +30,9 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" . "knative.dev/pkg/logging/testing" + "knative.dev/pkg/ptr" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -53,122 +58,292 @@ var _ = AfterSuite(func() { }) var _ = Describe("Controller", func() { + var provisioner *v1alpha3.Provisioner + BeforeEach(func() { + provisioner = &v1alpha3.Provisioner{ + ObjectMeta: metav1.ObjectMeta{Name: v1alpha3.DefaultProvisioner.Name}, + Spec: v1alpha3.ProvisionerSpec{}, + } + }) + AfterEach(func() { ExpectCleanedUp(env.Client) }) + Context("Expiration", func() { + It("should ignore nodes without TTLSecondsUntilExpired", func() { + n := test.Node(test.NodeOptions{ + Finalizers: []string{v1alpha3.TerminationFinalizer}, + Labels: map[string]string{ + v1alpha3.ProvisionerNameLabelKey: provisioner.Name, + }, + }) + ExpectCreated(env.Client, provisioner, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.DeletionTimestamp.IsZero()).To(BeTrue()) + }) + It("should ignore nodes without a provisioner", func() { + n := test.Node(test.NodeOptions{Finalizers: []string{v1alpha3.TerminationFinalizer}}) + ExpectCreated(env.Client, provisioner, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.DeletionTimestamp.IsZero()).To(BeTrue()) + }) + It("should terminate nodes after expiry", func() { + provisioner.Spec.TTLSecondsUntilExpired = ptr.Int64(30) + n := test.Node(test.NodeOptions{ + Finalizers: []string{v1alpha3.TerminationFinalizer}, + Labels: map[string]string{ + v1alpha3.ProvisionerNameLabelKey: provisioner.Name, + }, + }) + ExpectCreated(env.Client, provisioner, n) + + // Should still exist + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.DeletionTimestamp.IsZero()).To(BeTrue()) + + // Simulate time passing + afterExpiry := time.Now().Add(time.Duration(*provisioner.Spec.TTLSecondsUntilExpired) * time.Second) + monkey.Patch(time.Now, func() time.Time { + return afterExpiry + }) + defer func() { monkey.Unpatch(time.Now) }() + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.DeletionTimestamp.IsZero()).To(BeFalse()) + }) + }) + Context("Readiness", func() { It("should not remove the readiness taint if not ready", func() { - node := test.Node(test.NodeOptions{ + n := test.Node(test.NodeOptions{ ReadyStatus: v1.ConditionUnknown, - Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: randomdata.SillyName()}, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, Taints: []v1.Taint{ {Key: v1alpha3.NotReadyTaintKey, Effect: v1.TaintEffectNoSchedule}, {Key: randomdata.SillyName(), Effect: v1.TaintEffectNoSchedule}, }, }) - ExpectCreatedWithStatus(env.Client, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) + ExpectCreated(env.Client, provisioner) + ExpectCreatedWithStatus(env.Client, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Spec.Taints).To(Equal(node.Spec.Taints)) + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.Spec.Taints).To(Equal(n.Spec.Taints)) }) It("should remove the readiness taint if ready", func() { - node := test.Node(test.NodeOptions{ + n := test.Node(test.NodeOptions{ ReadyStatus: v1.ConditionTrue, - Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: randomdata.SillyName()}, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, Taints: []v1.Taint{ {Key: v1alpha3.NotReadyTaintKey, Effect: v1.TaintEffectNoSchedule}, {Key: randomdata.SillyName(), Effect: v1.TaintEffectNoSchedule}, }, }) - ExpectCreatedWithStatus(env.Client, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) + ExpectCreated(env.Client, provisioner) + ExpectCreatedWithStatus(env.Client, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Spec.Taints).ToNot(Equal([]v1.Taint{node.Spec.Taints[1]})) + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.Spec.Taints).ToNot(Equal([]v1.Taint{n.Spec.Taints[1]})) }) It("should do nothing if ready and the readiness taint does not exist", func() { - node := test.Node(test.NodeOptions{ + n := test.Node(test.NodeOptions{ ReadyStatus: v1.ConditionTrue, - Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: randomdata.SillyName()}, - Taints: []v1.Taint{ - {Key: randomdata.SillyName(), Effect: v1.TaintEffectNoSchedule}, - }, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + Taints: []v1.Taint{{Key: randomdata.SillyName(), Effect: v1.TaintEffectNoSchedule}}, }) - ExpectCreatedWithStatus(env.Client, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) + ExpectCreated(env.Client, provisioner) + ExpectCreatedWithStatus(env.Client, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Spec.Taints).To(Equal(node.Spec.Taints)) + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.Spec.Taints).To(Equal(n.Spec.Taints)) }) It("should do nothing if not owned by a provisioner", func() { - node := test.Node(test.NodeOptions{ + n := test.Node(test.NodeOptions{ ReadyStatus: v1.ConditionTrue, Taints: []v1.Taint{ {Key: v1alpha3.NotReadyTaintKey, Effect: v1.TaintEffectNoSchedule}, {Key: randomdata.SillyName(), Effect: v1.TaintEffectNoSchedule}, }, }) - ExpectCreatedWithStatus(env.Client, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) + ExpectCreated(env.Client, provisioner) + ExpectCreatedWithStatus(env.Client, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Spec.Taints).To(Equal(node.Spec.Taints)) + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.Spec.Taints).To(Equal(n.Spec.Taints)) }) }) - Context("Finalizer", func() { - It("should add the termination finalizer if missing", func() { + Context("Liveness", func() { + It("should terminate nodes that fail to join after 5 minutes", func() { + n := test.Node(test.NodeOptions{ + Finalizers: []string{v1alpha3.TerminationFinalizer}, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + ReadyStatus: v1.ConditionUnknown, + }) + pod := test.Pod(test.PodOptions{NodeName: n.Name}) + ExpectCreated(env.Client, provisioner, pod) + ExpectCreatedWithStatus(env.Client, n) + + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) + + // Expect n not deleted + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.DeletionTimestamp.IsZero()).To(BeTrue()) + + // Set pod DeletionTimestamp and do another reconcile + Expect(env.Client.Delete(ctx, pod)).To(Succeed()) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) + + // Expect node not deleted + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.DeletionTimestamp.IsZero()).To(BeTrue()) + + // Simulate time passing and a n failing to join + future := time.Now().Add(node.LivenessTimeout) + monkey.Patch(time.Now, func() time.Time { + return future + }) + defer func() { monkey.Unpatch(time.Now) }() + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.DeletionTimestamp.IsZero()).To(BeFalse()) + }) + }) + Describe("Emptiness", func() { + It("should not TTL nodes that have ready status unknown", func() { + provisioner.Spec.TTLSecondsAfterEmpty = ptr.Int64(30) node := test.Node(test.NodeOptions{ - Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: randomdata.SillyName()}, - Finalizers: []string{"fake.com/finalizer"}, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + ReadyStatus: v1.ConditionUnknown, }) + + ExpectCreated(env.Client, provisioner) ExpectCreatedWithStatus(env.Client, node) ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Finalizers).To(ConsistOf(node.Finalizers[0], v1alpha3.TerminationFinalizer)) + node = ExpectNodeExists(env.Client, node.Name) + Expect(node.Annotations).ToNot(HaveKey(v1alpha3.ProvisionerTTLAfterEmptyKey)) }) - It("should do nothing if terminating", func() { + It("should not TTL nodes that have ready status false", func() { + provisioner.Spec.TTLSecondsAfterEmpty = ptr.Int64(30) node := test.Node(test.NodeOptions{ - Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: randomdata.SillyName()}, - Finalizers: []string{"fake.com/finalizer"}, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + ReadyStatus: v1.ConditionFalse, }) + + ExpectCreated(env.Client, provisioner) ExpectCreatedWithStatus(env.Client, node) - Expect(env.Client.Delete(ctx, node)).To(Succeed()) ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Finalizers).To(Equal(node.Finalizers)) + node = ExpectNodeExists(env.Client, node.Name) + Expect(node.Annotations).ToNot(HaveKey(v1alpha3.ProvisionerTTLAfterEmptyKey)) }) - It("should do nothing if the termination finalizer already exists", func() { + It("should label nodes as underutilized and add TTL", func() { + provisioner.Spec.TTLSecondsAfterEmpty = ptr.Int64(30) node := test.Node(test.NodeOptions{ - Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: randomdata.SillyName()}, - Finalizers: []string{v1alpha3.TerminationFinalizer, "fake.com/finalizer"}, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, }) + ExpectCreated(env.Client, provisioner) ExpectCreatedWithStatus(env.Client, node) ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Finalizers).To(Equal(node.Finalizers)) + node = ExpectNodeExists(env.Client, node.Name) + Expect(node.Annotations).To(HaveKey(v1alpha3.ProvisionerTTLAfterEmptyKey)) }) - It("should do nothing if the not owned by a provisioner", func() { + It("should remove labels from non-empty nodes", func() { + provisioner.Spec.TTLSecondsAfterEmpty = ptr.Int64(30) node := test.Node(test.NodeOptions{ - Finalizers: []string{"fake.com/finalizer"}, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + Annotations: map[string]string{ + v1alpha3.ProvisionerTTLAfterEmptyKey: time.Now().Add(100 * time.Second).Format(time.RFC3339), + }, }) + ExpectCreated(env.Client, provisioner) ExpectCreatedWithStatus(env.Client, node) + ExpectCreatedWithStatus(env.Client, test.Pod(test.PodOptions{ + Name: strings.ToLower(randomdata.SillyName()), + Namespace: provisioner.Namespace, + NodeName: node.Name, + Conditions: []v1.PodCondition{{Type: v1.PodReady, Status: v1.ConditionTrue}}, + })) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) + + node = ExpectNodeExists(env.Client, node.Name) + Expect(node.Annotations).ToNot(HaveKey(v1alpha3.ProvisionerTTLAfterEmptyKey)) + }) + It("should terminate empty nodes past their TTL", func() { + provisioner.Spec.TTLSecondsAfterEmpty = ptr.Int64(30) + node := test.Node(test.NodeOptions{ + Finalizers: []string{v1alpha3.TerminationFinalizer}, + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + Annotations: map[string]string{ + v1alpha3.ProvisionerTTLAfterEmptyKey: time.Now().Add(-100 * time.Second).Format(time.RFC3339), + }, + }) + ExpectCreated(env.Client, provisioner, node) ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Finalizers).To(Equal(node.Finalizers)) + node = ExpectNodeExists(env.Client, node.Name) + Expect(node.DeletionTimestamp.IsZero()).To(BeFalse()) + }) + }) + Context("Finalizer", func() { + It("should add the termination finalizer if missing", func() { + n := test.Node(test.NodeOptions{ + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + Finalizers: []string{"fake.com/finalizer"}, + }) + ExpectCreated(env.Client, provisioner) + ExpectCreatedWithStatus(env.Client, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.Finalizers).To(ConsistOf(n.Finalizers[0], v1alpha3.TerminationFinalizer)) + }) + It("should do nothing if terminating", func() { + n := test.Node(test.NodeOptions{ + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + Finalizers: []string{"fake.com/finalizer"}, + }) + ExpectCreated(env.Client, provisioner) + ExpectCreatedWithStatus(env.Client, n) + Expect(env.Client.Delete(ctx, n)).To(Succeed()) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.Finalizers).To(Equal(n.Finalizers)) + }) + It("should do nothing if the termination finalizer already exists", func() { + n := test.Node(test.NodeOptions{ + Labels: map[string]string{v1alpha3.ProvisionerNameLabelKey: provisioner.Name}, + Finalizers: []string{v1alpha3.TerminationFinalizer, "fake.com/finalizer"}, + }) + ExpectCreated(env.Client, provisioner) + ExpectCreatedWithStatus(env.Client, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.Finalizers).To(Equal(n.Finalizers)) + }) + It("should do nothing if the not owned by a provisioner", func() { + n := test.Node(test.NodeOptions{ + Finalizers: []string{"fake.com/finalizer"}, + }) + ExpectCreated(env.Client, provisioner) + ExpectCreatedWithStatus(env.Client, n) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n)) + + n = ExpectNodeExists(env.Client, n.Name) + Expect(n.Finalizers).To(Equal(n.Finalizers)) }) }) }) diff --git a/pkg/controllers/reallocation/controller.go b/pkg/controllers/reallocation/controller.go deleted file mode 100644 index ca4b0274960f..000000000000 --- a/pkg/controllers/reallocation/controller.go +++ /dev/null @@ -1,109 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package reallocation - -import ( - "context" - "fmt" - "time" - - "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" - "github.com/awslabs/karpenter/pkg/cloudprovider" - "golang.org/x/time/rate" - "knative.dev/pkg/logging" - - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/client-go/util/workqueue" - controllerruntime "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/reconcile" -) - -// Controller for the resource -type Controller struct { - Utilization *Utilization - CloudProvider cloudprovider.CloudProvider - KubeClient client.Client -} - -// NewController constructs a controller instance -func NewController(kubeClient client.Client, cloudProvider cloudprovider.CloudProvider) *Controller { - return &Controller{ - Utilization: &Utilization{KubeClient: kubeClient}, - CloudProvider: cloudProvider, - KubeClient: kubeClient, - } -} - -// Reconcile executes a reallocation control loop for the resource -func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).Named("Reallocation")) - - // 1. Retrieve provisioner from reconcile request - provisioner := &v1alpha3.Provisioner{} - if err := c.KubeClient.Get(ctx, req.NamespacedName, provisioner); err != nil { - if errors.IsNotFound(err) { - return reconcile.Result{}, nil - } - return reconcile.Result{}, err - } - - // 2. Delete any node that has been unable to join. - if err := c.Utilization.terminateFailedToJoin(ctx, provisioner); err != nil { - return reconcile.Result{}, fmt.Errorf("terminating nodes that failed to join, %w", err) - } - - // Skip reconciliation if utilization ttl is not defined. - if provisioner.Spec.TTLSecondsAfterEmpty == nil { - return reconcile.Result{}, nil - } - - // 3. Set TTL on TTLable Nodes - if err := c.Utilization.markUnderutilized(ctx, provisioner); err != nil { - return reconcile.Result{}, fmt.Errorf("adding ttl and underutilized label, %w", err) - } - - // 4. Remove TTL from Utilized Nodes - if err := c.Utilization.clearUnderutilized(ctx, provisioner); err != nil { - return reconcile.Result{}, fmt.Errorf("removing ttl from node, %w", err) - } - - // 5. Delete any node past its TTL - if err := c.Utilization.terminateExpired(ctx, provisioner); err != nil { - return reconcile.Result{}, fmt.Errorf("marking nodes terminable, %w", err) - } - - return reconcile.Result{RequeueAfter: 5 * time.Second}, nil -} - -func (c *Controller) Register(_ context.Context, m manager.Manager) error { - return controllerruntime. - NewControllerManagedBy(m). - Named("Reallocation"). - For(&v1alpha3.Provisioner{}). - WithOptions( - controller.Options{ - RateLimiter: workqueue.NewMaxOfRateLimiter( - workqueue.NewItemExponentialFailureRateLimiter(100*time.Millisecond, 10*time.Second), - // 10 qps, 100 bucket size - &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, - ), - MaxConcurrentReconciles: 1, - }, - ). - Complete(c) -} diff --git a/pkg/controllers/reallocation/suite_test.go b/pkg/controllers/reallocation/suite_test.go deleted file mode 100644 index dbbb0c2bac16..000000000000 --- a/pkg/controllers/reallocation/suite_test.go +++ /dev/null @@ -1,213 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package reallocation_test - -import ( - "context" - "strings" - "testing" - "time" - - "github.com/Pallinder/go-randomdata" - "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" - "github.com/awslabs/karpenter/pkg/cloudprovider/fake" - "github.com/awslabs/karpenter/pkg/cloudprovider/registry" - "github.com/awslabs/karpenter/pkg/controllers/reallocation" - "github.com/awslabs/karpenter/pkg/test" - - "bou.ke/monkey" - . "github.com/awslabs/karpenter/pkg/test/expectations" - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - . "knative.dev/pkg/logging/testing" - "knative.dev/pkg/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -var ctx context.Context -var controller *reallocation.Controller -var env *test.Environment - -func TestAPIs(t *testing.T) { - ctx = TestContextWithLogger(t) - RegisterFailHandler(Fail) - RunSpecs(t, "Provisioner/Reallocator") -} - -var _ = BeforeSuite(func() { - env = test.NewEnvironment(ctx, func(e *test.Environment) { - cloudProvider := &fake.CloudProvider{} - registry.RegisterOrDie(cloudProvider) - controller = &reallocation.Controller{ - Utilization: &reallocation.Utilization{KubeClient: e.Client}, - CloudProvider: cloudProvider, - KubeClient: e.Client, - } - }) - Expect(env.Start()).To(Succeed(), "Failed to start environment") -}) - -var _ = AfterSuite(func() { - Expect(env.Stop()).To(Succeed(), "Failed to stop environment") -}) - -var _ = Describe("Reallocation", func() { - var provisioner *v1alpha3.Provisioner - - BeforeEach(func() { - provisioner = &v1alpha3.Provisioner{ - ObjectMeta: metav1.ObjectMeta{Name: v1alpha3.DefaultProvisioner.Name}, - Spec: v1alpha3.ProvisionerSpec{ - Cluster: v1alpha3.Cluster{Name: ptr.String("test-cluster"), Endpoint: "http://test-cluster", CABundle: ptr.String("dGVzdC1jbHVzdGVyCg==")}, - TTLSecondsAfterEmpty: ptr.Int64(300), - }, - } - }) - - AfterEach(func() { - ExpectCleanedUp(env.Client) - }) - - Context("Reconciliation", func() { - It("should not TTL nodes that have ready status unknown", func() { - node := test.Node(test.NodeOptions{ - ReadyStatus: v1.ConditionUnknown, - }) - - ExpectCreated(env.Client, provisioner) - ExpectCreatedWithStatus(env.Client, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) - - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Labels).ToNot(HaveKey(v1alpha3.ProvisionerUnderutilizedLabelKey)) - Expect(updatedNode.Annotations).ToNot(HaveKey(v1alpha3.ProvisionerTTLAfterEmptyKey)) - }) - It("should not TTL nodes that have ready status false", func() { - node := test.Node(test.NodeOptions{ - ReadyStatus: v1.ConditionFalse, - }) - - ExpectCreated(env.Client, provisioner) - ExpectCreatedWithStatus(env.Client, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) - - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Labels).ToNot(HaveKey(v1alpha3.ProvisionerUnderutilizedLabelKey)) - Expect(updatedNode.Annotations).ToNot(HaveKey(v1alpha3.ProvisionerTTLAfterEmptyKey)) - }) - It("should label nodes as underutilized and add TTL", func() { - node := test.Node(test.NodeOptions{ - Labels: map[string]string{ - v1alpha3.ProvisionerNameLabelKey: provisioner.Name, - }, - }) - ExpectCreated(env.Client, provisioner) - ExpectCreatedWithStatus(env.Client, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) - - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Labels).To(HaveKey(v1alpha3.ProvisionerUnderutilizedLabelKey)) - Expect(updatedNode.Annotations).To(HaveKey(v1alpha3.ProvisionerTTLAfterEmptyKey)) - }) - It("should remove labels from utilized nodes", func() { - node := test.Node(test.NodeOptions{ - Labels: map[string]string{ - v1alpha3.ProvisionerNameLabelKey: provisioner.Name, - v1alpha3.ProvisionerUnderutilizedLabelKey: "true", - }, - Annotations: map[string]string{ - v1alpha3.ProvisionerTTLAfterEmptyKey: time.Now().Add(100 * time.Second).Format(time.RFC3339), - }, - }) - ExpectCreated(env.Client, provisioner) - ExpectCreatedWithStatus(env.Client, node) - ExpectCreatedWithStatus(env.Client, test.Pod(test.PodOptions{ - Name: strings.ToLower(randomdata.SillyName()), - Namespace: provisioner.Namespace, - NodeName: node.Name, - Conditions: []v1.PodCondition{{Type: v1.PodReady, Status: v1.ConditionTrue}}, - })) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) - - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.Labels).ToNot(HaveKey(v1alpha3.ProvisionerUnderutilizedLabelKey)) - Expect(updatedNode.Annotations).ToNot(HaveKey(v1alpha3.ProvisionerTTLAfterEmptyKey)) - }) - It("should terminate underutilized nodes past their TTL", func() { - node := test.Node(test.NodeOptions{ - Finalizers: []string{v1alpha3.TerminationFinalizer}, - Labels: map[string]string{ - v1alpha3.ProvisionerNameLabelKey: provisioner.Name, - v1alpha3.ProvisionerUnderutilizedLabelKey: "true", - }, - Annotations: map[string]string{ - v1alpha3.ProvisionerTTLAfterEmptyKey: time.Now().Add(-100 * time.Second).Format(time.RFC3339), - }, - }) - ExpectCreated(env.Client, provisioner, node) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) - - updatedNode := &v1.Node{} - Expect(env.Client.Get(ctx, client.ObjectKey{Name: node.Name}, updatedNode)).To(Succeed()) - Expect(updatedNode.DeletionTimestamp.IsZero()).To(BeFalse()) - }) - It("should only terminate nodes that failed to join with all pods terminating after 5 minutes", func() { - node := test.Node(test.NodeOptions{ - Finalizers: []string{v1alpha3.TerminationFinalizer}, - Labels: map[string]string{ - v1alpha3.ProvisionerNameLabelKey: provisioner.Name, - v1alpha3.ProvisionerUnderutilizedLabelKey: "true", - }, - ReadyStatus: v1.ConditionUnknown, - }) - pod := test.Pod(test.PodOptions{ - Finalizers: []string{"fake.sh/finalizer"}, - NodeName: node.Name, - }) - ExpectCreated(env.Client, provisioner, pod) - ExpectCreatedWithStatus(env.Client, node) - - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) - - // Expect node not deleted - updatedNode := ExpectNodeExists(env.Client, node.Name) - Expect(updatedNode.DeletionTimestamp.IsZero()).To(BeTrue()) - - // Set pod DeletionTimestamp and do another reconcile - Expect(env.Client.Delete(ctx, pod)).To(Succeed()) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) - - // Expect node not deleted - updatedNode = ExpectNodeExists(env.Client, node.Name) - Expect(updatedNode.DeletionTimestamp.IsZero()).To(BeTrue()) - - // Simulate time passing and a node failing to join - future := time.Now().Add(reallocation.FailedToJoinTimeout) - monkey.Patch(time.Now, func() time.Time { - return future - }) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner)) - - updatedNode = ExpectNodeExists(env.Client, node.Name) - Expect(updatedNode.DeletionTimestamp.IsZero()).To(BeFalse()) - }) - }) -}) diff --git a/pkg/controllers/reallocation/utilization.go b/pkg/controllers/reallocation/utilization.go deleted file mode 100644 index 9386757f2336..000000000000 --- a/pkg/controllers/reallocation/utilization.go +++ /dev/null @@ -1,163 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package reallocation - -import ( - "context" - "fmt" - "time" - - "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3" - "github.com/awslabs/karpenter/pkg/utils/functional" - utilsnode "github.com/awslabs/karpenter/pkg/utils/node" - "github.com/awslabs/karpenter/pkg/utils/pod" - "github.com/awslabs/karpenter/pkg/utils/ptr" - - v1 "k8s.io/api/core/v1" - "knative.dev/pkg/logging" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -const FailedToJoinTimeout = 5 * time.Minute - -type Utilization struct { - KubeClient client.Client -} - -// markUnderutilized adds a TTL to underutilized nodes -func (u *Utilization) markUnderutilized(ctx context.Context, provisioner *v1alpha3.Provisioner) error { - ttlable := []*v1.Node{} - // 1. Get all provisioner nodes - nodes, err := u.getNodes(ctx, provisioner, map[string]string{}) - if err != nil { - return err - } - // 2. Get underutilized nodes - for _, node := range nodes { - if !utilsnode.IsReady(node) { - continue - } - pods, err := u.getPods(ctx, node) - if err != nil { - return fmt.Errorf("getting pods for node %s, %w", node.Name, err) - } - if pod.IgnoredForUnderutilization(pods) { - if _, ok := node.Annotations[v1alpha3.ProvisionerTTLAfterEmptyKey]; !ok { - ttlable = append(ttlable, node) - } - } - } - // 3. Set TTL for each underutilized node - for _, node := range ttlable { - persisted := node.DeepCopy() - node.Labels = functional.UnionStringMaps( - node.Labels, - map[string]string{v1alpha3.ProvisionerUnderutilizedLabelKey: "true"}, - ) - node.Annotations = functional.UnionStringMaps( - node.Annotations, - map[string]string{v1alpha3.ProvisionerTTLAfterEmptyKey: time.Now().Add(time.Duration(ptr.Int64Value(provisioner.Spec.TTLSecondsAfterEmpty)) * time.Second).Format(time.RFC3339)}, - ) - if err := u.KubeClient.Patch(ctx, node, client.MergeFrom(persisted)); err != nil { - return fmt.Errorf("patching node %s, %w", node.Name, err) - } - logging.FromContext(ctx).Infof("Added TTL and label to underutilized node %s", node.Name) - } - return nil -} - -// clearUnderutilized removes the TTL on underutilized nodes if there is sufficient resource usage -func (u *Utilization) clearUnderutilized(ctx context.Context, provisioner *v1alpha3.Provisioner) error { - // 1. Get underutilized nodes - nodes, err := u.getNodes(ctx, provisioner, map[string]string{v1alpha3.ProvisionerUnderutilizedLabelKey: "true"}) - if err != nil { - return fmt.Errorf("listing labeled underutilized nodes, %w", err) - } - // 2. Clear underutilized label if node is utilized - for _, node := range nodes { - pods, err := u.getPods(ctx, node) - if err != nil { - return fmt.Errorf("listing pods on node %s, %w", node.Name, err) - } - if !pod.IgnoredForUnderutilization(pods) { - persisted := node.DeepCopy() - delete(node.Labels, v1alpha3.ProvisionerUnderutilizedLabelKey) - delete(node.Annotations, v1alpha3.ProvisionerTTLAfterEmptyKey) - if err := u.KubeClient.Patch(ctx, node, client.MergeFrom(persisted)); err != nil { - return fmt.Errorf("removing underutilized label on %s, %w", node.Name, err) - } else { - logging.FromContext(ctx).Infof("Removed TTL from node %s", node.Name) - } - } - } - return nil -} - -// terminateExpired checks if a node is past its ttl and marks it -func (u *Utilization) terminateExpired(ctx context.Context, provisioner *v1alpha3.Provisioner) error { - // 1. Get underutilized nodes - nodes, err := u.getNodes(ctx, provisioner, map[string]string{v1alpha3.ProvisionerUnderutilizedLabelKey: "true"}) - if err != nil { - return fmt.Errorf("listing underutilized nodes, %w", err) - } - // 2. Trigger termination workflow if past TTLAfterEmpty - for _, node := range nodes { - if utilsnode.IsPastEmptyTTL(node) { - logging.FromContext(ctx).Infof("Triggering termination for empty node %s", node.Name) - if err := u.KubeClient.Delete(ctx, node); err != nil { - return fmt.Errorf("deleting node %s, %w", node.Name, err) - } - } - } - return nil -} - -func (u *Utilization) terminateFailedToJoin(ctx context.Context, provisioner *v1alpha3.Provisioner) error { - // 1. Get nodes - nodes, err := u.getNodes(ctx, provisioner, map[string]string{}) - if err != nil { - return fmt.Errorf("listing nodes, %w", err) - } - // 2. Trigger termination workflow if node has failed to become ready for 5 minutes - for _, node := range nodes { - if utilsnode.FailedToJoin(node, FailedToJoinTimeout) { - logging.FromContext(ctx).Infof("Triggering termination for node that failed to join %s", node.Name) - if err := u.KubeClient.Delete(ctx, node); err != nil { - return fmt.Errorf("deleting node %s, %w", node.Name, err) - } - } - } - return nil -} - -// getNodes returns a list of nodes with the provisioner's labels and given labels -func (u *Utilization) getNodes(ctx context.Context, provisioner *v1alpha3.Provisioner, additionalLabels map[string]string) ([]*v1.Node, error) { - nodes := &v1.NodeList{} - if err := u.KubeClient.List(ctx, nodes, client.MatchingLabels(functional.UnionStringMaps(map[string]string{ - v1alpha3.ProvisionerNameLabelKey: provisioner.Name, - }, additionalLabels))); err != nil { - return nil, fmt.Errorf("listing nodes, %w", err) - } - return ptr.NodeListToSlice(nodes), nil -} - -// getPods returns a list of pods scheduled to a node -func (u *Utilization) getPods(ctx context.Context, node *v1.Node) ([]*v1.Pod, error) { - pods := &v1.PodList{} - if err := u.KubeClient.List(ctx, pods, client.MatchingFields{"spec.nodeName": node.Name}); err != nil { - return nil, fmt.Errorf("listing pods on node %s, %w", node.Name, err) - } - return ptr.PodListToSlice(pods), nil -} diff --git a/pkg/test/nodes.go b/pkg/test/nodes.go index 308d89b3c24f..4677434dc277 100644 --- a/pkg/test/nodes.go +++ b/pkg/test/nodes.go @@ -66,7 +66,7 @@ func Node(overrides ...NodeOptions) *v1.Node { }, Spec: v1.NodeSpec{ Unschedulable: options.Unschedulable, - Taints: options.Taints, + Taints: options.Taints, }, Status: v1.NodeStatus{ Allocatable: options.Allocatable, diff --git a/pkg/utils/functional/functional.go b/pkg/utils/functional/functional.go index d71c711c7521..d0a483d47706 100644 --- a/pkg/utils/functional/functional.go +++ b/pkg/utils/functional/functional.go @@ -15,7 +15,10 @@ limitations under the License. package functional import ( + "encoding/json" + "reflect" "strings" + "time" "go.uber.org/multierr" ) @@ -109,3 +112,26 @@ func InvertStringMap(stringMap map[string]string) map[string]string { } return inverted } + +// MaxDuration returns the largest duration +func MaxDuration(durations ...time.Duration) time.Duration { + var max time.Duration + for _, duration := range durations { + if duration > max { + max = duration + } + } + return max +} + +func JsonEquals(a, b interface{}) bool { + aJson, err := json.Marshal(a) + if err != nil { + panic(err) + } + bJson, err := json.Marshal(b) + if err != nil { + panic(err) + } + return reflect.DeepEqual(string(aJson), string(bJson)) +} diff --git a/pkg/utils/pod/scheduling.go b/pkg/utils/pod/scheduling.go index defe7695253a..25155260def9 100644 --- a/pkg/utils/pod/scheduling.go +++ b/pkg/utils/pod/scheduling.go @@ -45,19 +45,6 @@ func IsSchedulable(pod *v1.PodSpec, node *v1.Node) bool { return true } -// IgnoredForUnderutilization returns true if the set of pods has no non-daemonset pods -func IgnoredForUnderutilization(pods []*v1.Pod) bool { - for _, p := range pods { - if HasFailed(p) { - continue - } - if !IsOwnedByDaemonSet(p) { - return false - } - } - return true -} - // ToleratesTaints returns an error if the pod does not tolerate the taints // https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/#concepts func ToleratesTaints(spec *v1.PodSpec, taints ...v1.Taint) (err error) {