Skip to content

Commit

Permalink
add a namespace selector to the provisioner spec
Browse files Browse the repository at this point in the history
This is modeled after the NamespaceSelector on pod affinity terms
and works the same way.

Fixes aws#1493
  • Loading branch information
tzneal committed Mar 10, 2022
1 parent 1e75c7c commit c433c76
Show file tree
Hide file tree
Showing 9 changed files with 227 additions and 3 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ apply: ## Deploy the controller into your ~/.kube/config cluster
$(HELM_OPTS) \
--set controller.image=$(shell $(WITH_GOFLAGS) ko build -B github.com/aws/karpenter/cmd/controller) \
--set webhook.image=$(shell $(WITH_GOFLAGS) ko build -B github.com/aws/karpenter/cmd/webhook)
@# apply any modified/added CRDs to the server
@git status charts/karpenter/crds -s | grep "^ [MA]" | sed 's/^ [MA] //' | xargs -n 1 kubectl apply -f

delete: ## Delete the controller from your ~/.kube/config cluster
helm uninstall karpenter --namespace karpenter
Expand Down
47 changes: 47 additions & 0 deletions charts/karpenter/crds/karpenter.sh_provisioners.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,53 @@ spec:
that Karpenter supports for limiting.
type: object
type: object
namespaceSelector:
description: A label query over the set of namespaces that the provisioner
applies to. The provisioner is applied to the pods in the namespaces
selected by this field. An omitted selector or an empty selector
({}) matches all namespaces.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
provider:
description: Provider contains fields specific to your cloudprovider.
type: object
Expand Down
3 changes: 3 additions & 0 deletions charts/karpenter/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["create"]
- apiGroups: [ "" ]
resources: [ "namespaces" ]
verbs: [ "get", "list", "watch" ]
- apiGroups: [""]
resources: ["pods/binding", "pods/eviction"]
verbs: ["create"]
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/provisioning/v1alpha5/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ import (
type ProvisionerSpec struct {
// Constraints are applied to all nodes launched by this provisioner.
Constraints `json:",inline"`
// A label query over the set of namespaces that the provisioner applies to.
// The provisioner is applied to the pods in the namespaces selected by this field.
// An omitted selector or an empty selector ({}) matches all namespaces.
// +optional
NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty" protobuf:"bytes,4,opt,name=namespaceSelector"`
// TTLSecondsAfterEmpty is the number of seconds the controller will wait
// before attempting to delete a node, measured from when the node is
// detected to be empty. A Node is considered to be empty when it does not
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/provisioning/v1alpha5/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/controllers/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ func NewManagerOrDie(ctx context.Context, config *rest.Config, options controlle
}); err != nil {
panic(fmt.Sprintf("Failed to setup pod indexer, %s", err))
}
// metadata.name normally works as a field selector against the API server, however the client is cached and this is
// required to enable matching against the cached fields
if err := newManager.GetFieldIndexer().IndexField(ctx, &v1.Namespace{}, "metadata.name", func(o client.Object) []string {
return []string{o.(*v1.Namespace).Name}
}); err != nil {
panic(fmt.Sprintf("Failed to setup namespace indexer, %s", err))
}
return &GenericControllerManager{Manager: newManager}
}

Expand Down
58 changes: 55 additions & 3 deletions pkg/controllers/selection/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ import (
"fmt"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"

"github.com/go-logr/zapr"
"go.uber.org/multierr"
"go.uber.org/zap"
Expand Down Expand Up @@ -75,6 +79,7 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
logging.FromContext(ctx).Errorf("Ignoring pod, %s", err)
return reconcile.Result{}, nil
}

// Select a provisioner, wait for it to bind the pod, and verify scheduling succeeded in the next loop
if err := c.selectProvisioner(ctx, pod); err != nil {
logging.FromContext(ctx).Debugf("Could not schedule pod, %s", err)
Expand All @@ -96,13 +101,30 @@ func (c *Controller) selectProvisioner(ctx context.Context, pod *v1.Pod) (errs e
if len(provisioners) == 0 {
return nil
}

// lookup the pod namespace for matching against the provisioner
podNamespace, err := c.getPodNamespace(ctx, pod)
if err != nil {
return err
}

for _, candidate := range c.provisioners.List(ctx) {
// check if the provisioner is allowed to provision pods in this namespace
if err := provisionerCanProvision(candidate, podNamespace); err != nil {
errs = multierr.Append(errs, fmt.Errorf("tried provisioner/%s: %w", candidate.Name, err))
continue
}

// ValidatePod is on Constraints, which is embedded in ProvisionerSpec. If that gets reworked, consider moving
// provisionerCanProvision to there as well
if err := candidate.Spec.DeepCopy().ValidatePod(pod); err != nil {
errs = multierr.Append(errs, fmt.Errorf("tried provisioner/%s: %w", candidate.Name, err))
} else {
provisioner = candidate
break
continue
}

// found a matching provisioner
provisioner = candidate
break
}
if provisioner == nil {
return fmt.Errorf("matched 0/%d provisioners, %w", len(multierr.Errors(errs)), errs)
Expand All @@ -114,6 +136,36 @@ func (c *Controller) selectProvisioner(ctx context.Context, pod *v1.Pod) (errs e
return nil
}

// provisionerCanProvision returns nil if the candidate provisioner is configured to provision pods in the provided
// namespace
func provisionerCanProvision(candidate *provisioning.Provisioner, podNamespace v1.Namespace) error {
// no namespace selector, so we accept everything
if candidate.Spec.NamespaceSelector == nil {
return nil
}
// validate that the pod is in a namespace that the provisioner provisions for
selector, err := metav1.LabelSelectorAsSelector(candidate.Spec.NamespaceSelector)
if err != nil {
return err
}
if !selector.Matches(labels.Set(podNamespace.Labels)) {
return fmt.Errorf("pod doesn't match namespace label selector")
}
return nil
}

func (c *Controller) getPodNamespace(ctx context.Context, pod *v1.Pod) (v1.Namespace, error) {
nsList := &v1.NamespaceList{}
if err := c.kubeClient.List(ctx, nsList, &client.ListOptions{FieldSelector: fields.OneTermEqualSelector("metadata.name", pod.Namespace)}); err != nil {
return v1.Namespace{}, fmt.Errorf("unable to list namespaces %w", err)
}
if len(nsList.Items) == 0 {
// shouldn't happen
return v1.Namespace{}, fmt.Errorf("namespace %s not found", pod.Namespace)
}
return nsList.Items[0], nil
}

func isProvisionable(p *v1.Pod) bool {
return !pod.IsScheduled(p) &&
!pod.IsPreempting(p) &&
Expand Down
93 changes: 93 additions & 0 deletions pkg/controllers/selection/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,99 @@ var _ = AfterEach(func() {
ExpectProvisioningCleanedUp(ctx, env.Client, provisioners)
})

var _ = Describe("Namespace Selector", func() {
It("should schedule if there is no namespace selector", func() {
provisioner.Spec.NamespaceSelector = nil
ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner)
pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner,
test.UnschedulablePod(),
)[0]
ExpectScheduled(ctx, env.Client, pod)
})
It("should schedule if there is an empty namespace selector", func() {
provisioner.Spec.NamespaceSelector = &metav1.LabelSelector{
MatchLabels: map[string]string{},
MatchExpressions: []metav1.LabelSelectorRequirement{},
}

ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner)
pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner,
test.UnschedulablePod(),
)[0]
ExpectScheduled(ctx, env.Client, pod)
})
It("should not schedule if the pod isn't in a matching namespace, MatchLabels", func() {
provisioner.Spec.NamespaceSelector = &metav1.LabelSelector{
MatchLabels: map[string]string{
"foo": "bar",
},
}
ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner)
pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner,
test.UnschedulablePod(),
)[0]
ExpectNotScheduled(ctx, env.Client, pod)
})
It("should not schedule if the pod isn't in a matching namespace, MatchExpressions", func() {
provisioner.Spec.NamespaceSelector = &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar"},
},
},
}
ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner)
pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner,
test.UnschedulablePod(),
)[0]
ExpectNotScheduled(ctx, env.Client, pod)
})
It("should schedule if the pod is in a matching namespace, MatchLabels", func() {
ns := randomdata.Noun() + randomdata.Adjective() // need a lowercase name here
ExpectCreated(ctx, env.Client, &v1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: ns,
Labels: map[string]string{"foo": "bar"},
},
})

// select for namespaces with the label foo=bar
provisioner.Spec.NamespaceSelector = &metav1.LabelSelector{
MatchLabels: map[string]string{"foo": "bar"},
}
pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner,
test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Namespace: ns}}),
)[0]
ExpectScheduled(ctx, env.Client, pod)
})
It("should schedule if the pod is in a matching namespace, MatchExpressions", func() {
ns := randomdata.Noun() + randomdata.Adjective()
ExpectCreated(ctx, env.Client, &v1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: ns,
Labels: map[string]string{"foo": "bar"},
},
})

// select for namespaces with the label foo in ["bar"]
provisioner.Spec.NamespaceSelector = &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar"},
},
},
}
pod := ExpectProvisioned(ctx, env.Client, selectionController, provisioners, provisioner,
test.UnschedulablePod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Namespace: ns}}),
)[0]
ExpectScheduled(ctx, env.Client, pod)
})
})

var _ = Describe("Volume Topology Requirements", func() {
var storageClass *storagev1.StorageClass
BeforeEach(func() {
Expand Down
9 changes: 9 additions & 0 deletions website/content/en/preview/provisioner.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ spec:
# If omitted, the feature is disabled, nodes will never scale down due to low utilization
ttlSecondsAfterEmpty: 30

# If omitted, Karpenter will create nodes for pods in any namespace
namespaceSelector:
matchLabels:
karpenter: "yes"

# Provisioned nodes will have these taints
# Taints may prevent pods from scheduling if they are not tolerated
taints:
Expand Down Expand Up @@ -69,6 +74,10 @@ spec:
If neither of these values are set, Karpenter will *not* delete instances. It is recommended to set the `ttlSecondsAfterEmpty` value, to enable scale down of the cluster.

### spec.namespaceSelector

Setting a value here causes Karpenter to only respond to unschedulable pods within the namespaces that are matched by the namespaceSelector. This matches against labels on the namespaces.

### spec.ttlSecondsAfterEmpty

Setting a value here enables Karpenter to delete empty/unnecessary instances. DaemonSets are excluded from considering a node "empty". This value is in seconds.
Expand Down

0 comments on commit c433c76

Please sign in to comment.