Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add allocation bind duration metric #677

Merged
merged 1 commit into from
Sep 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ require (
github.com/onsi/ginkgo v1.16.4
github.com/onsi/gomega v1.13.0
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/prometheus/client_golang v1.11.0
go.uber.org/multierr v1.7.0
golang.org/x/time v0.0.0-20210611083556-38a9dc6acbc6
k8s.io/api v0.20.7
Expand Down
260 changes: 0 additions & 260 deletions go.sum

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pkg/cloudprovider/aws/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ var _ = BeforeSuite(func() {
registry.RegisterOrDie(cloudProvider)
controller = &allocation.Controller{
Filter: &allocation.Filter{KubeClient: e.Client},
Binder: &allocation.Binder{KubeClient: e.Client, CoreV1Client: clientSet.CoreV1()},
Binder: allocation.NewBinder(e.Client, clientSet.CoreV1()),
Batcher: allocation.NewBatcher(1*time.Millisecond, 1*time.Millisecond),
Scheduler: scheduling.NewScheduler(cloudProvider, e.Client),
Packer: binpacking.NewPacker(),
Expand Down
77 changes: 70 additions & 7 deletions pkg/controllers/allocation/bind.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ package allocation
import (
"context"
"fmt"
"time"

"github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -27,14 +29,23 @@ import (
"k8s.io/client-go/util/workqueue"
"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

type Binder struct {
KubeClient client.Client
CoreV1Client corev1.CoreV1Interface
type Binder interface {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious why you promoted this to an interface? Do we expect multiple implementations?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It facilitates using the decorator pattern, with the metric logic being in a separate decorate type.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I worry a bit about this pattern for structs that have more methods. You could use struct embedding to achieve a similar mechanism without requiring full method overloading.

type DecoratedBinder {
   Binder // embedded struct, all methods are promoted to DecoratedBinder
}

func (b *Binder) foo() {} 
func (b *Binder) bar() {}

func (d *DecoratedBinder) foo() { 
  // decorate before
  d.Binder.foo()
  // decorate after
}

In this case, bar is not overloaded, so anyone calling decoratedBinder.bar() will get the undecorated implementation.

Bind(context.Context, *v1.Node, []*v1.Pod) error
}

func (b *Binder) Bind(ctx context.Context, node *v1.Node, pods []*v1.Pod) error {
type binder struct {
kubeClient client.Client
coreV1Client corev1.CoreV1Interface
}

func NewBinder(kubeClient client.Client, coreV1Client corev1.CoreV1Interface) Binder {
return &binder{kubeClient: kubeClient, coreV1Client: coreV1Client}
}

func (b *binder) Bind(ctx context.Context, node *v1.Node, pods []*v1.Pod) error {
// 1. Add the Karpenter finalizer to the node to enable the termination workflow
node.Finalizers = append(node.Finalizers, v1alpha3.TerminationFinalizer)
// 2. Taint karpenter.sh/not-ready=NoSchedule to prevent the kube scheduler
Expand All @@ -55,7 +66,7 @@ func (b *Binder) Bind(ctx context.Context, node *v1.Node, pods []*v1.Pod) error
// with the API server. In the common case, we create the node object
// ourselves to enforce the binding decision and enable images to be pulled
// before the node is fully Ready.
if _, err := b.CoreV1Client.Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
if _, err := b.coreV1Client.Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil {
if !errors.IsAlreadyExists(err) {
return fmt.Errorf("creating node %s, %w", node.Name, err)
}
Expand All @@ -71,9 +82,9 @@ func (b *Binder) Bind(ctx context.Context, node *v1.Node, pods []*v1.Pod) error
return err
}

func (b *Binder) bind(ctx context.Context, node *v1.Node, pod *v1.Pod) error {
func (b *binder) bind(ctx context.Context, node *v1.Node, pod *v1.Pod) error {
// TODO, Stop using deprecated v1.Binding
if err := b.CoreV1Client.Pods(pod.Namespace).Bind(ctx, &v1.Binding{
if err := b.coreV1Client.Pods(pod.Namespace).Bind(ctx, &v1.Binding{
TypeMeta: pod.TypeMeta,
ObjectMeta: pod.ObjectMeta,
Target: v1.ObjectReference{Name: node.Name},
Expand All @@ -82,3 +93,55 @@ func (b *Binder) bind(ctx context.Context, node *v1.Node, pod *v1.Pod) error {
}
return nil
}

type binderMetricsDecorator struct {
binder Binder
bindTimeHistogramVec *prometheus.HistogramVec
}

const metricLabelResult = "result"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason this isn't inlined?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An old habit of avoiding bare keys in map lookups; misspellings cause runtime problems. When using a constant, misspellings cause compile-time errors.


func DecorateBinderMetrics(binder Binder) Binder {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if (in the future) we could abstract this decoration even further and minimally wrap the code as we need it with something generic.

I'm imagining an interface like

// inside binder

var err error
metrics.recordLatency(metrics.LatencyOptions{ Name: "bind" }, func() {
  err = b.Bind()
})

bindTimeHistogramVec := prometheus.NewHistogramVec(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thoughts on putting this whole thing in a Var that can be accessed at package scope?

e.g. https://github.com/prometheus/prometheus/blob/main/discovery/kubernetes/client_metrics.go

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an advantage to doing so?

prometheus.HistogramOpts{
Namespace: "karpenter",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thoughts on building a /pkg/metrics component with helpers to encapsulate the buckets, Name, Subsystem, etc?

package metrics

type HistogramOptions {
   Namespace: string // defaults to karpenter
   Buckets []float64
   ...
}

func NewHistogram(options HistogramOptions) {
   // default options if some not set (e.g. buckets)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps after more metrics have been implemented and patterns have been identified. Defining constants for Namespace and Subsystem seems likely, but not necessary in this commit as each appears only once in the code.

Subsystem: "allocation_controller",
Name: "bind_duration_seconds",
Help: "Duration of bind process in seconds. Broken down by result.",
// Use same bucket thresholds as controller-runtime.
// https://github.com/kubernetes-sigs/controller-runtime/blob/v0.10.0/pkg/internal/controller/metrics/metrics.go#L47-L48
Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 40, 50, 60},
},
[]string{metricLabelResult},
)
metrics.Registry.MustRegister(bindTimeHistogramVec)

return &binderMetricsDecorator{binder: binder, bindTimeHistogramVec: bindTimeHistogramVec}
}

func (b *binderMetricsDecorator) Bind(ctx context.Context, node *v1.Node, pods []*v1.Pod) error {
startTime := time.Now()
bindErr := b.binder.Bind(ctx, node, pods)
durationSeconds := time.Since(startTime).Seconds()

result := "success"
if bindErr != nil {
result = "error"
}

observer, promErr := b.bindTimeHistogramVec.GetMetricWith(prometheus.Labels{metricLabelResult: result})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Under what scenarios can we get an error? My understanding is that this is all static memory allocation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Underlying implementations may change over time. Since the API may return an error it seems advisable to treat it as though it could fail, even if the current implementation makes that very unlikely.

Another question is whether a failure in the metrics reporting is considered a recoverable error or whether karpenter should panic? This implementation assumes such a failure is not a cause to panic.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would agree, that metric emission is not a cause to panic.

Copy link
Contributor

@ellistarn ellistarn Sep 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine to log an error and continue.

if promErr != nil {
logging.FromContext(ctx).Warnf(
"Failed to record bind duration metric [%s=%s, duration=%f]: error=%w",
metricLabelResult,
result,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit wary of the code complexity of tracking everything this error is going to log. Especially since we will already bubble up the errors in the normal karpenter controller logic to print something along the lines of (failed to X).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the karpenter controller logic treat a failure to record the metric as a failure of the bind operation?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think metric emission errors should bubble up to the controller interface on bind. I think the warn provides enough visibility if this were to fail, but it's not necessarily critical to the actual bind operation (and it's unlikely to occur).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it was errorable, I agree, but I'm unsure whether or not recording a metric should be able to fail.

Taking a quick peek at the implementation, it looks like the only way it can fail is if the labels can't be hashed. Of course, this implementation could change, but I expect from how I've seen prometheus metrics used across k8s projects is that the intent of these metrics is to be used as statics.
For example we could do something like. If it does change in an unexpected way, we'll fail fast at process startup.

var (
  bindLatencyHistogramVec = prometheus.NewHistogramVec( ... )
  successfulBindLatencyHistogram = runtimeutil.Must(bindLatencyHistogramVec.GetMetricWith( ... )
  failedBindLatencyHistogram = runtimeutil.Must(bindLatencyHistogramVec.GetMetricWith( ... )
)

func bind(... ) {
   ...
   successfulBindLatencyHistogram.Observe(...)
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking further on this, I wonder if we should be including Provisioner labels (name/namespace) into these metrics, which would make my above points moot (since the labels are dynamic).

durationSeconds,
promErr,
)
} else {
observer.Observe(durationSeconds)
}

return bindErr
}
4 changes: 2 additions & 2 deletions pkg/controllers/allocation/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ const (
type Controller struct {
Batcher *Batcher
Filter *Filter
Binder *Binder
Binder Binder
Scheduler *scheduling.Scheduler
Packer binpacking.Packer
CloudProvider cloudprovider.CloudProvider
Expand All @@ -66,7 +66,7 @@ type Controller struct {
func NewController(kubeClient client.Client, coreV1Client corev1.CoreV1Interface, cloudProvider cloudprovider.CloudProvider) *Controller {
return &Controller{
Filter: &Filter{KubeClient: kubeClient},
Binder: &Binder{KubeClient: kubeClient, CoreV1Client: coreV1Client},
Binder: DecorateBinderMetrics(NewBinder(kubeClient, coreV1Client)),
Batcher: NewBatcher(maxBatchWindow, batchIdleTimeout),
Scheduler: scheduling.NewScheduler(cloudProvider, kubeClient),
Packer: binpacking.NewPacker(),
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/allocation/scheduling/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ var _ = BeforeSuite(func() {
registry.RegisterOrDie(cloudProvider)
controller = &allocation.Controller{
Filter: &allocation.Filter{KubeClient: e.Client},
Binder: &allocation.Binder{KubeClient: e.Client, CoreV1Client: corev1.NewForConfigOrDie(e.Config)},
Binder: allocation.NewBinder(e.Client, corev1.NewForConfigOrDie(e.Config)),
Batcher: allocation.NewBatcher(1*time.Millisecond, 1*time.Millisecond),
Scheduler: scheduling.NewScheduler(cloudProvider, e.Client),
Packer: binpacking.NewPacker(),
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/allocation/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ var _ = BeforeSuite(func() {
registry.RegisterOrDie(cloudProvider)
controller = &allocation.Controller{
Filter: &allocation.Filter{KubeClient: e.Client},
Binder: &allocation.Binder{KubeClient: e.Client, CoreV1Client: corev1.NewForConfigOrDie(e.Config)},
Binder: allocation.NewBinder(e.Client, corev1.NewForConfigOrDie(e.Config)),
Batcher: allocation.NewBatcher(1*time.Millisecond, 1*time.Millisecond),
Scheduler: scheduling.NewScheduler(cloudProvider, e.Client),
Packer: binpacking.NewPacker(),
Expand Down