From a47aac93c026429d124de63d9d714223ded3cc58 Mon Sep 17 00:00:00 2001 From: Sunny Date: Fri, 11 Aug 2023 14:18:21 +0000 Subject: [PATCH] Delete stale metrics on object delete Use the metrics helper to record all the metrics. Metrics helpers ensures that the metrics for deleted objects are deleted as well. Move all the metrics recording to be performed at the very end of the reconciliation. Realtime metrics for readiness is no longer recorded as it will be removed in a future version for CRD metrics collected using kube-state-metrics. Updating the object status with realtime readiness should provide the readiness to CRD metrics watchers. `HelmReleaseReconciler.reconcileDelete()` is modified to receive a pointer HelmRelease object so that any modifications on the object is reflected on the object instance that's passed to the metrics recorder. This is not needed for `HelmReleaseReconciler.reconcile()` as it returns a new copy of the object that's saved in the same object variable, overwriting the object instance with the updates. Signed-off-by: Sunny --- go.mod | 2 +- go.sum | 4 +- internal/controller/helmrelease_controller.go | 86 ++++--------------- main.go | 7 +- 4 files changed, 21 insertions(+), 78 deletions(-) diff --git a/go.mod b/go.mod index c1d48ef73..0dcedff1a 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/fluxcd/pkg/apis/event v0.5.2 github.com/fluxcd/pkg/apis/kustomize v1.1.1 github.com/fluxcd/pkg/apis/meta v1.1.2 - github.com/fluxcd/pkg/runtime v0.41.0 + github.com/fluxcd/pkg/runtime v0.42.0 github.com/fluxcd/pkg/ssa v0.30.0 github.com/fluxcd/source-controller/api v1.0.1 github.com/go-logr/logr v1.2.4 diff --git a/go.sum b/go.sum index b92b5a79a..ffb30de50 100644 --- a/go.sum +++ b/go.sum @@ -173,8 +173,8 @@ github.com/fluxcd/pkg/apis/kustomize v1.1.1 h1:MSGn4z0R9PptmoPFHnx2nEZ8Jtl1sKfw0 github.com/fluxcd/pkg/apis/kustomize v1.1.1/go.mod h1:0pCu0ecIY+ZM0iE/hOHYwCMZ3b0SpBrjJ1SH3FFyYdE= github.com/fluxcd/pkg/apis/meta v1.1.2 h1:Unjo7hxadtB2dvGpeFqZZUdsjpRA08YYSBb7dF2WIAM= github.com/fluxcd/pkg/apis/meta v1.1.2/go.mod h1:BHQyRHCskGMEDf6kDGbgQ+cyiNpUHbLsCOsaMYM2maI= -github.com/fluxcd/pkg/runtime v0.41.0 h1:hjWUwVRCKDuGEUhovWrygt/6PRry4p278yKuJNgTfv8= -github.com/fluxcd/pkg/runtime v0.41.0/go.mod h1:1GN+nxoQ7LmSsLJwjH8JW8pA27tBSO+KLH43HpywCDM= +github.com/fluxcd/pkg/runtime v0.42.0 h1:a5DQ/f90YjoHBmiXZUpnp4bDSLORjInbmqP7K11L4uY= +github.com/fluxcd/pkg/runtime v0.42.0/go.mod h1:p6A3xWVV8cKLLQW0N90GehKgGMMmbNYv+OSJ/0qB0vg= github.com/fluxcd/pkg/ssa v0.30.0 h1:SYf8EBXevbTNwdHoKqRuU00YdnmqqUuR5xcciRrIi0E= github.com/fluxcd/pkg/ssa v0.30.0/go.mod h1:eUcni/amc2fM9rJ3ZR3oPeAW/ZYk3mOGO1TW9RFmAuI= github.com/fluxcd/source-controller/api v1.0.1 h1:nycylbNBnQd+EO4UHpqXqAQJ1cGAPxgBbrfERCQ1pp8= diff --git a/internal/controller/helmrelease_controller.go b/internal/controller/helmrelease_controller.go index fbc0ab12a..afd58a6a2 100644 --- a/internal/controller/helmrelease_controller.go +++ b/internal/controller/helmrelease_controller.go @@ -37,7 +37,6 @@ import ( "k8s.io/cli-runtime/pkg/genericclioptions" "k8s.io/client-go/rest" kuberecorder "k8s.io/client-go/tools/record" - "k8s.io/client-go/tools/reference" "sigs.k8s.io/cli-utils/pkg/kstatus/polling" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -54,8 +53,8 @@ import ( "github.com/fluxcd/pkg/apis/meta" "github.com/fluxcd/pkg/runtime/acl" runtimeClient "github.com/fluxcd/pkg/runtime/client" + helper "github.com/fluxcd/pkg/runtime/controller" "github.com/fluxcd/pkg/runtime/jitter" - "github.com/fluxcd/pkg/runtime/metrics" "github.com/fluxcd/pkg/runtime/predicates" "github.com/fluxcd/pkg/runtime/transform" sourcev1 "github.com/fluxcd/source-controller/api/v1beta2" @@ -78,11 +77,11 @@ import ( // HelmReleaseReconciler reconciles a HelmRelease object type HelmReleaseReconciler struct { client.Client + helper.Metrics Config *rest.Config Scheme *runtime.Scheme EventRecorder kuberecorder.EventRecorder - MetricsRecorder *metrics.Recorder DefaultServiceAccount string NoCrossNamespaceRef bool ClientOpts runtimeClient.Options @@ -153,8 +152,12 @@ func (r *HelmReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } - // record suspension metrics - defer r.recordSuspension(ctx, hr) + defer func() { + // Always record metrics. + r.Metrics.RecordSuspend(ctx, &hr, hr.Spec.Suspend) + r.Metrics.RecordReadiness(ctx, &hr) + r.Metrics.RecordDuration(ctx, &hr, start) + }() // Add our finalizer if it does not exist if !controllerutil.ContainsFinalizer(&hr, v2.HelmReleaseFinalizer) { @@ -168,7 +171,7 @@ func (r *HelmReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Examine if the object is under deletion if !hr.ObjectMeta.DeletionTimestamp.IsZero() { - return r.reconcileDelete(ctx, hr) + return r.reconcileDelete(ctx, &hr) } // Return early if the HelmRelease is suspended. @@ -185,9 +188,6 @@ func (r *HelmReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{Requeue: true}, updateStatusErr } - // Record ready status - r.recordReadiness(ctx, hr) - // Log reconciliation duration durationMsg := fmt.Sprintf("reconciliation finished in %s", time.Now().Sub(start).String()) if result.RequeueAfter > 0 { @@ -199,7 +199,6 @@ func (r *HelmReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) } func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease) (v2.HelmRelease, ctrl.Result, error) { - reconcileStart := time.Now() log := ctrl.LoggerFrom(ctx) // Record the value of the reconciliation request, if any if v, ok := meta.ReconcileAnnotationValue(hr.GetAnnotations()); ok { @@ -214,17 +213,6 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease log.Error(updateStatusErr, "unable to update status after generation update") return hr, ctrl.Result{Requeue: true}, updateStatusErr } - // Record progressing status - r.recordReadiness(ctx, hr) - } - - // Record reconciliation duration - if r.MetricsRecorder != nil { - objRef, err := reference.GetReference(r.Scheme, &hr) - if err != nil { - return hr, ctrl.Result{Requeue: true}, err - } - defer r.MetricsRecorder.RecordDuration(*objRef, reconcileStart) } // Reconcile chart based on the HelmChartTemplate @@ -371,8 +359,6 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, log.Error(updateStatusErr, "unable to update status after state update") return hr, updateStatusErr } - // Record progressing status - r.recordReadiness(ctx, hr) } // Check status of any previous release attempt. @@ -670,12 +656,11 @@ func (r *HelmReleaseReconciler) composeValues(ctx context.Context, hr v2.HelmRel // and uninstalls the Helm release if the resource has not been suspended. // It only performs a Helm uninstall if the ServiceAccount to be impersonated // exists. -func (r *HelmReleaseReconciler) reconcileDelete(ctx context.Context, hr v2.HelmRelease) (ctrl.Result, error) { - r.recordReadiness(ctx, hr) +func (r *HelmReleaseReconciler) reconcileDelete(ctx context.Context, hr *v2.HelmRelease) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) // Delete the HelmChart that belongs to this resource. - if err := r.deleteHelmChart(ctx, &hr); err != nil { + if err := r.deleteHelmChart(ctx, hr); err != nil { return ctrl.Result{}, err } @@ -693,7 +678,7 @@ func (r *HelmReleaseReconciler) reconcileDelete(ctx context.Context, hr v2.HelmR ) if impersonator.CanImpersonate(ctx) { - getter, err := r.buildRESTClientGetter(ctx, hr) + getter, err := r.buildRESTClientGetter(ctx, *hr) if err != nil { return ctrl.Result{}, err } @@ -701,7 +686,7 @@ func (r *HelmReleaseReconciler) reconcileDelete(ctx context.Context, hr v2.HelmR if err != nil { return ctrl.Result{}, err } - if err := run.Uninstall(hr); err != nil && !errors.Is(err, driver.ErrReleaseNotFound) { + if err := run.Uninstall(*hr); err != nil && !errors.Is(err, driver.ErrReleaseNotFound) { return ctrl.Result{}, err } log.Info("uninstalled Helm release for deleted resource") @@ -709,15 +694,15 @@ func (r *HelmReleaseReconciler) reconcileDelete(ctx context.Context, hr v2.HelmR err := fmt.Errorf("failed to find service account to impersonate") msg := "skipping Helm uninstall" log.Error(err, msg) - r.event(ctx, hr, hr.Status.LastAppliedRevision, eventv1.EventSeverityError, fmt.Sprintf("%s: %s", msg, err.Error())) + r.event(ctx, *hr, hr.Status.LastAppliedRevision, eventv1.EventSeverityError, fmt.Sprintf("%s: %s", msg, err.Error())) } } else { ctrl.LoggerFrom(ctx).Info("skipping Helm uninstall for suspended resource") } // Remove our finalizer from the list and update it. - controllerutil.RemoveFinalizer(&hr, v2.HelmReleaseFinalizer) - if err := r.Update(ctx, &hr); err != nil { + controllerutil.RemoveFinalizer(hr, v2.HelmReleaseFinalizer) + if err := r.Update(ctx, hr); err != nil { return ctrl.Result{}, err } @@ -817,42 +802,3 @@ func (r *HelmReleaseReconciler) event(_ context.Context, hr v2.HelmRelease, revi } r.EventRecorder.AnnotatedEventf(&hr, eventMeta, eventType, severity, msg) } - -func (r *HelmReleaseReconciler) recordSuspension(ctx context.Context, hr v2.HelmRelease) { - if r.MetricsRecorder == nil { - return - } - log := ctrl.LoggerFrom(ctx) - - objRef, err := reference.GetReference(r.Scheme, &hr) - if err != nil { - log.Error(err, "unable to record suspended metric") - return - } - - if !hr.DeletionTimestamp.IsZero() { - r.MetricsRecorder.RecordSuspend(*objRef, false) - } else { - r.MetricsRecorder.RecordSuspend(*objRef, hr.Spec.Suspend) - } -} - -func (r *HelmReleaseReconciler) recordReadiness(ctx context.Context, hr v2.HelmRelease) { - if r.MetricsRecorder == nil { - return - } - - objRef, err := reference.GetReference(r.Scheme, &hr) - if err != nil { - ctrl.LoggerFrom(ctx).Error(err, "unable to record readiness metric") - return - } - if rc := apimeta.FindStatusCondition(hr.Status.Conditions, meta.ReadyCondition); rc != nil { - r.MetricsRecorder.RecordCondition(*objRef, *rc, !hr.DeletionTimestamp.IsZero()) - } else { - r.MetricsRecorder.RecordCondition(*objRef, metav1.Condition{ - Type: meta.ReadyCondition, - Status: metav1.ConditionUnknown, - }, !hr.DeletionTimestamp.IsZero()) - } -} diff --git a/main.go b/main.go index e89ca3403..a9ea78c13 100644 --- a/main.go +++ b/main.go @@ -34,7 +34,6 @@ import ( ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ctrlcfg "sigs.k8s.io/controller-runtime/pkg/config" - crtlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/fluxcd/pkg/runtime/acl" "github.com/fluxcd/pkg/runtime/client" @@ -143,9 +142,6 @@ func main() { os.Exit(1) } - metricsRecorder := metrics.NewRecorder() - crtlmetrics.Registry.MustRegister(metricsRecorder.Collectors()...) - if err := intervalJitterOptions.SetGlobalJitter(nil); err != nil { setupLog.Error(err, "unable to set global jitter") os.Exit(1) @@ -217,6 +213,7 @@ func main() { probes.SetupChecks(mgr, setupLog) pprof.SetupHandlers(mgr, setupLog) + metricsH := helper.NewMetrics(mgr, metrics.MustMakeRecorder(), v2.HelmReleaseFinalizer) var eventRecorder *events.Recorder if eventRecorder, err = events.NewRecorder(mgr, ctrl.Log, eventsAddr, controllerName); err != nil { setupLog.Error(err, "unable to create event recorder") @@ -246,7 +243,7 @@ func main() { Config: mgr.GetConfig(), Scheme: mgr.GetScheme(), EventRecorder: eventRecorder, - MetricsRecorder: metricsRecorder, + Metrics: metricsH, NoCrossNamespaceRef: aclOptions.NoCrossNamespaceRefs, ClientOpts: clientOptions, KubeConfigOpts: kubeConfigOpts,