Skip to content

Commit

Permalink
PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Sep 17, 2024
1 parent b0c3dcf commit 4e6c7ae
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 6 deletions.
2 changes: 2 additions & 0 deletions kwok/charts/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ spec:
from a combination of nodepool and pod scheduling constraints.
properties:
disruption:
default:
consolidateAfter: 0s
description: Disruption contains the parameters that relate to Karpenter's disruption logic
properties:
budgets:
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ spec:
from a combination of nodepool and pod scheduling constraints.
properties:
disruption:
default:
consolidateAfter: 0s
description: Disruption contains the parameters that relate to Karpenter's disruption logic
properties:
budgets:
Expand Down
10 changes: 6 additions & 4 deletions pkg/controllers/disruption/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"sigs.k8s.io/karpenter/pkg/utils/pretty"

v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/cloudprovider"
"sigs.k8s.io/karpenter/pkg/controllers/disruption/orchestration"
Expand Down Expand Up @@ -126,16 +128,16 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) {
return reconcile.Result{RequeueAfter: time.Second}, nil
}

// Karpenter taints nodes with a karpenter.sh/disruption taint as part of the disruption process
// while it progresses in memory. If Karpenter restarts during a disruption action, some nodes can be left tainted.
// Karpenter taints nodes with a karpenter.sh/disruption taint as part of the disruption process while it progresses in memory.
// If Karpenter restarts or fails with an error during a disruption action, some nodes can be left tainted.
// Idempotently remove this taint from candidates that are not in the orchestration queue before continuing.
if err := state.RequireNoScheduleTaint(ctx, c.kubeClient, false, lo.Filter(c.cluster.Nodes(), func(s *state.StateNode, _ int) bool {
return !c.queue.HasAny(s.ProviderID())
})...); err != nil {
if errors.IsConflict(err) {
return reconcile.Result{Requeue: true}, nil
}
return reconcile.Result{}, fmt.Errorf("removing taint from nodes, %w", err)
return reconcile.Result{}, fmt.Errorf("removing taint %s from nodes, %w", pretty.Taint(v1.DisruptedNoScheduleTaint), err)
}

// Attempt different disruption methods. We'll only let one method perform an action
Expand Down Expand Up @@ -207,7 +209,7 @@ func (c *Controller) executeCommand(ctx context.Context, m Method, cmd Command,
})
// Cordon the old nodes before we launch the replacements to prevent new pods from scheduling to the old nodes
if err := state.RequireNoScheduleTaint(ctx, c.kubeClient, true, stateNodes...); err != nil {
return fmt.Errorf("tainting nodes (command-id: %s), %w", commandID, err)
return fmt.Errorf("tainting nodes with %s (command-id: %s), %w", pretty.Taint(v1.DisruptedNoScheduleTaint), commandID, err)
}

var nodeClaimNames []string
Expand Down
5 changes: 4 additions & 1 deletion pkg/controllers/node/termination/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"sigs.k8s.io/karpenter/pkg/utils/pretty"

v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/cloudprovider"
"sigs.k8s.io/karpenter/pkg/controllers/node/termination/terminator"
Expand Down Expand Up @@ -104,7 +106,7 @@ func (c *Controller) finalize(ctx context.Context, node *corev1.Node) (reconcile
if errors.IsConflict(err) {
return reconcile.Result{Requeue: true}, nil
}
return reconcile.Result{}, client.IgnoreNotFound(fmt.Errorf("tainting node with %s, %w", v1.DisruptedTaintKey, err))
return reconcile.Result{}, client.IgnoreNotFound(fmt.Errorf("tainting node with %s, %w", pretty.Taint(v1.DisruptedNoScheduleTaint), err))
}
if err = c.terminator.Drain(ctx, node, nodeTerminationTime); err != nil {
if !terminator.IsNodeDrainError(err) {
Expand Down Expand Up @@ -237,6 +239,7 @@ func (c *Controller) removeFinalizer(ctx context.Context, n *corev1.Node) error
// We use client.StrategicMergeFrom here since the node object supports it and
// a strategic merge patch represents the finalizer list as a keyed "set" so removing
// an item from the list doesn't replace the full list
// https://github.com/kubernetes/kubernetes/issues/111643#issuecomment-2016489732
if err := c.kubeClient.Patch(ctx, n, client.StrategicMergeFrom(stored)); err != nil {
return client.IgnoreNotFound(fmt.Errorf("removing finalizer, %w", err))
}
Expand Down
1 change: 1 addition & 0 deletions pkg/controllers/nodeclaim/termination/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ func (c *Controller) finalize(ctx context.Context, nodeClaim *v1.NodeClaim) (rec
// We use client.MergeFromWithOptimisticLock because patching a list with a JSON merge patch
// can cause races due to the fact that it fully replaces the list on a change
// Here, we are updating the finalizer list
// https://github.com/kubernetes/kubernetes/issues/111643#issuecomment-2016489732
if err = c.kubeClient.Patch(ctx, nodeClaim, client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil {
if errors.IsConflict(err) {
return reconcile.Result{Requeue: true}, nil
Expand Down
4 changes: 3 additions & 1 deletion pkg/scheduling/taints.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
corev1 "k8s.io/api/core/v1"
cloudproviderapi "k8s.io/cloud-provider/api"

"sigs.k8s.io/karpenter/pkg/utils/pretty"

v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
)

Expand All @@ -49,7 +51,7 @@ func (ts Taints) Tolerates(pod *corev1.Pod) (errs error) {
tolerates = tolerates || t.ToleratesTaint(&taint)
}
if !tolerates {
errs = multierr.Append(errs, fmt.Errorf("did not tolerate %s=%s:%s", taint.Key, taint.Value, taint.Effect))
errs = multierr.Append(errs, fmt.Errorf("did not tolerate %s", pretty.Taint(taint)))
}
}
return errs
Expand Down
5 changes: 5 additions & 0 deletions pkg/utils/pretty/pretty.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

"golang.org/x/exp/constraints"
"golang.org/x/exp/slices"
v1 "k8s.io/api/core/v1"
)

func Concise(o interface{}) string {
Expand Down Expand Up @@ -77,3 +78,7 @@ func Map[K constraints.Ordered, V any](values map[K]V, maxItems int) string {
}
return buf.String()
}

func Taint(t v1.Taint) string {
return fmt.Sprintf("%s=%s:%s", t.Key, t.Value, t.Effect)
}

0 comments on commit 4e6c7ae

Please sign in to comment.