Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce High Level MR metrics #683

Merged
merged 6 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/evanphx/json-patch v5.6.0+incompatible
github.com/go-logr/logr v1.4.1
github.com/google/go-cmp v0.6.0
github.com/prometheus/client_golang v1.18.0
github.com/spf13/afero v1.11.0
golang.org/x/time v0.5.0
google.golang.org/grpc v1.61.0
Expand All @@ -19,6 +20,7 @@ require (
k8s.io/apiextensions-apiserver v0.29.1
k8s.io/apimachinery v0.29.1
k8s.io/client-go v0.29.1
k8s.io/component-base v0.29.1
k8s.io/klog/v2 v2.110.1
sigs.k8s.io/controller-runtime v0.17.0
sigs.k8s.io/controller-tools v0.14.0
Expand All @@ -31,6 +33,7 @@ require (
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/bufbuild/protocompile v0.6.0 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/containerd/log v0.1.0 // indirect
Expand Down Expand Up @@ -87,7 +90,6 @@ require (
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/profile v1.7.0 // indirect
github.com/prometheus/client_golang v1.18.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.45.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
Expand Down Expand Up @@ -123,7 +125,6 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/component-base v0.29.1 // indirect
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migc
github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/bufbuild/buf v1.27.2 h1:uX2kvZfPfRoOsrxUW4LwpykSyH+wI5dUnIG0QWHDCCU=
github.com/bufbuild/buf v1.27.2/go.mod h1:7RImDhFDqhEsdK5wbuMhoVSlnrMggGGcd3s9WozvHtM=
github.com/bufbuild/protocompile v0.6.0 h1:Uu7WiSQ6Yj9DbkdnOe7U4mNKp58y9WDMKDn28/ZlunY=
Expand Down
204 changes: 204 additions & 0 deletions pkg/reconciler/managed/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
/*
Copyright 2023 The Crossplane Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package managed

import (
"context"
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/tools/cache"
kmetrics "k8s.io/component-base/metrics"
"sigs.k8s.io/controller-runtime/pkg/cluster"
"sigs.k8s.io/controller-runtime/pkg/metrics"

xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1"
"github.com/crossplane/crossplane-runtime/pkg/errors"
"github.com/crossplane/crossplane-runtime/pkg/resource"
)

func init() { //nolint:gochecknoinits // metrics should be registered once
metrics.Registry.MustRegister(drift, mr, mrReady, mrSynced, mrDetected, mrReadyDuration, mrDeletionDuration)
}
negz marked this conversation as resolved.
Show resolved Hide resolved

const subSystem = "crossplane"

var (
drift = prometheus.NewHistogramVec(prometheus.HistogramOpts{ //nolint:gochecknoglobals // metrics should be registered once in init
Subsystem: subSystem,
Name: "resource_drift_seconds",
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved
Help: "ALPHA: How long since the previous successful reconcile when a resource was found to be out of sync; excludes restart of the provider",
Buckets: kmetrics.ExponentialBuckets(10e-9, 10, 10),
}, []string{"group", "kind"})

mr = prometheus.NewGaugeVec(prometheus.GaugeOpts{ //nolint:gochecknoglobals // metrics should be registered once in init
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved
Subsystem: subSystem,
Name: "managed_resource_created",
Help: "The number of managed resources created",
}, []string{"gvk", "name", "claim", "composite"})
negz marked this conversation as resolved.
Show resolved Hide resolved

mrReady = prometheus.NewGaugeVec(prometheus.GaugeOpts{ //nolint:gochecknoglobals // metrics should be registered once in init
Subsystem: subSystem,
Name: "managed_resource_ready",
Help: "The number of managed resources in Ready=True state",
}, []string{"gvk", "name", "claim", "composite"})
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved

mrReadyDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ //nolint:gochecknoglobals // metrics should be registered once in init
Subsystem: subSystem,
Name: "managed_resource_ready_duration_seconds",
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved
Help: "The time it took for a managed resource to become ready first time after creation",
Buckets: []float64{1, 5, 10, 15, 30, 60, 120, 300, 600, 1800, 3600},
}, []string{"gvk", "name", "claim", "composite"})

mrDetected = prometheus.NewHistogramVec(prometheus.HistogramOpts{ //nolint:gochecknoglobals // metrics should be registered once in init
Subsystem: subSystem,
Name: "managed_resource_detected_time_seconds",
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved
Help: "The time it took for a managed resource to be detected by the controller",
Buckets: kmetrics.ExponentialBuckets(10e-9, 10, 10),
}, []string{"gvk", "name", "claim", "composite"})

mrSynced = prometheus.NewGaugeVec(prometheus.GaugeOpts{ //nolint:gochecknoglobals // metrics should be registered once in init
Subsystem: subSystem,
Name: "managed_resource_synced",
Help: "The number of managed resources in Synced=True state",
}, []string{"gvk", "name", "claim", "composite"})

mrDeletionDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ //nolint:gochecknoglobals // metrics should be registered once in init
Subsystem: subSystem,
Name: "managed_resource_deletion_seconds",
Help: "The time it took for a managed resource to be deleted",
Buckets: []float64{1, 5, 10, 15, 30, 60, 120, 300, 600, 1800, 3600},
}, []string{"gvk", "name", "claim", "composite"})
)

type metricRecorder struct {
firstObservation sync.Map
lastObservation sync.Map

cluster cluster.Cluster
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved
gvk schema.GroupVersionKind
}

func (r *metricRecorder) Start(ctx context.Context) error {
inf, err := r.cluster.GetCache().GetInformerForKind(ctx, r.gvk)
if err != nil {
return errors.Wrapf(err, "cannot get informer for metric recorder for resource %s", r.gvk)
}

registered, err := inf.AddEventHandler(cache.ResourceEventHandlerFuncs{
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved
DeleteFunc: func(obj interface{}) {
if final, ok := obj.(cache.DeletedFinalStateUnknown); ok {
obj = final.Obj
}
managed, ok := obj.(resource.Managed)
if !ok {
return
}
r.firstObservation.Delete(managed.GetName())
r.lastObservation.Delete(managed.GetName())
},
})
if err != nil {
return errors.Wrap(err, "cannot add delete event handler to informer for metric recorder")
}
defer inf.RemoveEventHandler(registered) //nolint:errcheck // this happens on destruction. We cannot do anything anyway.

<-ctx.Done()

return nil
}

func (r *metricRecorder) recordUnchanged(name string) {
r.lastObservation.Store(name, time.Now())
}

func (r *metricRecorder) recordUpdate(name string) {
last, ok := r.lastObservation.Load(name)
if !ok {
return
}
lt, ok := last.(time.Time)
if !ok {
return
}

drift.WithLabelValues(r.gvk.Group, r.gvk.Kind).Observe(time.Since(lt).Seconds())

r.lastObservation.Store(name, time.Now())
}

func (r *metricRecorder) recordDetected(managed resource.Managed) {
if managed.GetCondition(xpv1.TypeSynced).Status == corev1.ConditionUnknown {
mr.With(getMRMetricLabels(managed)).Set(1)
mrDetected.With(getMRMetricLabels(managed)).Observe(time.Since(managed.GetCreationTimestamp().Time).Seconds())
r.firstObservation.Store(managed.GetName(), time.Now()) // this is the first time we reconciled on this resource
}
}

func (r *metricRecorder) recordSyncedState(managed resource.Managed, v float64) {
mrSynced.With(getMRMetricLabels(managed)).Set(v)
}

func (r *metricRecorder) recordNotReady(managed resource.Managed) {
mrReady.With(getMRMetricLabels(managed)).Set(0)
}

func (r *metricRecorder) recordDeleted(managed resource.Managed) {
labels := getMRMetricLabels(managed)

if managed.GetDeletionTimestamp() != nil {
mrDeletionDuration.With(getMRMetricLabels(managed)).Observe(time.Since(managed.GetDeletionTimestamp().Time).Seconds())
}
mr.With(labels).Set(0)
mrReady.With(labels).Set(0)
mrSynced.With(labels).Set(0)
}

func (r *metricRecorder) recordUpToDate(managed resource.Managed) {
mrSynced.With(getMRMetricLabels(managed)).Set(1)
// Note that providers may set the ready condition to "True", so we need
// to check the value here to send the ready metric
if managed.GetCondition(xpv1.TypeReady).Status == corev1.ConditionTrue {
mrReady.With(getMRMetricLabels(managed)).Set(1)
name := managed.GetName()
_, ok := r.firstObservation.Load(name) // This map is used to identify the first time to readiness
if !ok {
return
}

mrReadyDuration.With(getMRMetricLabels(managed)).Observe(time.Since(managed.GetCreationTimestamp().Time).Seconds())
r.firstObservation.Delete(managed.GetName())
}
}

func getMRMetricLabels(managed resource.Managed) prometheus.Labels {
l := prometheus.Labels{
"gvk": managed.GetObjectKind().GroupVersionKind().String(),
"name": managed.GetName(),
"claim": "",
"composite": managed.GetLabels()["crossplane.io/composite"],
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved
}

if managed.GetLabels()["crossplane.io/claim-namespace"] != "" && managed.GetLabels()["crossplane.io/claim-name"] != "" {
l["claim"] = managed.GetLabels()["crossplane.io/claim-namespace"] + "/" + managed.GetLabels()["crossplane.io/claim-name"]
ezgidemirel marked this conversation as resolved.
Show resolved Hide resolved
}

return l
}
Loading
Loading