Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metrics to controller loops #76

Merged
merged 6 commits into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/google/uuid v1.3.0
github.com/hashicorp/go-multierror v1.1.1
github.com/openservicemesh/osm v1.2.3
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/client_model v0.3.0
github.com/prometheus/common v0.39.0
github.com/stretchr/testify v1.8.1
Expand Down Expand Up @@ -60,7 +61,6 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.14.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/rogpeppe/go-internal v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
Expand Down
27 changes: 22 additions & 5 deletions pkg/controller/common/cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"math"
"time"

"github.com/Azure/aks-app-routing-operator/pkg/controller/metrics"
"github.com/go-logr/logr"
"github.com/hashicorp/go-multierror"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -87,23 +88,35 @@ func (c *cleaner) Start(ctx context.Context) error {
}

func (c *cleaner) Clean(ctx context.Context) error {
var err error
defer func() {
//placing this call inside a closure allows for result and err to be bound after tick executes
NickKeller marked this conversation as resolved.
Show resolved Hide resolved
//this makes sure they have the proper value
//just calling defer metrics.HandleControllerReconcileMetrics(controllerName, result, err) would bind
//the values of result and err to their zero values, since they were just instantiated
metrics.HandleControllerReconcileMetrics(c.controllerName(), ctrl.Result{}, err)
}()
if c.retriever == nil {
return errors.New("retriever is nil")
err = errors.New("retriever is nil")
return err
}

types, err := c.retriever(c.mapper)
if err != nil {
return fmt.Errorf("retrieving gvr types: %w", err)
err = fmt.Errorf("retrieving gvr types: %w", err)
return err
}

var result *multierror.Error
for _, t := range types {
if err := c.CleanType(ctx, t); err != nil {
result = multierror.Append(result, fmt.Errorf("cleaning type %s with labels %s: %w", t.gvr.String(), t.labels, err))
if cleanTypeErr := c.CleanType(ctx, t); cleanTypeErr != nil {
result = multierror.Append(result, fmt.Errorf("cleaning type %s with labels %s: %w", t.gvr.String(), t.labels, cleanTypeErr))
}
}

return result.ErrorOrNil()
err = result.ErrorOrNil()
return err

}

func (c *cleaner) CleanType(ctx context.Context, t cleanType) error {
Expand Down Expand Up @@ -168,6 +181,10 @@ func (c *cleaner) NeedLeaderElection() bool {
return true
}

func (c *cleaner) controllerName() string {
return fmt.Sprintf("%s_cleaner", c.name)
NickKeller marked this conversation as resolved.
Show resolved Hide resolved
}

func isNamespaced(clientset kubernetes.Interface, gvr schema.GroupVersionResource) (bool, error) {
res, err := clientset.Discovery().ServerResourcesForGroupVersion(gvr.GroupVersion().String())
if err != nil {
Expand Down
9 changes: 7 additions & 2 deletions pkg/controller/common/resource_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"time"

"github.com/Azure/aks-app-routing-operator/pkg/controller/metrics"
"github.com/Azure/aks-app-routing-operator/pkg/util"
"github.com/go-logr/logr"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -21,6 +22,7 @@ type resourceReconciler struct {

// NewResourceReconciler creates a reconciler that continuously ensures that the provided resources are provisioned
func NewResourceReconciler(manager ctrl.Manager, name string, resources []client.Object, reconcileInterval time.Duration) error {
metrics.InitControllerMetrics(name)
rr := &resourceReconciler{
name: name,
client: manager.GetClient(),
Expand Down Expand Up @@ -55,22 +57,25 @@ func (r *resourceReconciler) Start(ctx context.Context) error {
}

func (r *resourceReconciler) tick(ctx context.Context) error {
var err error
start := time.Now()
r.logger.Info("starting to reconcile resources")
defer func() {
r.logger.Info("finished reconciling resources", "latencySec", time.Since(start).Seconds())

metrics.HandleControllerReconcileMetrics(r.name, ctrl.Result{}, err)
}()

for _, res := range r.resources {
copy := res.DeepCopyObject().(client.Object)
if copy.GetDeletionTimestamp() != nil {
if err := r.client.Delete(ctx, copy); err != nil && !k8serrors.IsNotFound(err) {
if err = r.client.Delete(ctx, copy); err != nil && !k8serrors.IsNotFound(err) {
r.logger.Error(err, "deleting unneeded resources")
}
continue
}

if err := util.Upsert(ctx, r.client, copy); err != nil {
if err = util.Upsert(ctx, r.client, copy); err != nil {
return err
}
}
Expand Down
14 changes: 13 additions & 1 deletion pkg/controller/ingress/concurrency_watchdog.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/Azure/aks-app-routing-operator/pkg/config"
"github.com/Azure/aks-app-routing-operator/pkg/controller/metrics"
"github.com/Azure/aks-app-routing-operator/pkg/util"
)

const (
concurrencyWatchdogControllerName = "concurrency_watchdog"
)

// ScrapeFn returns the connection count for the given pod
type ScrapeFn func(ctx context.Context, client rest.Interface, pod *corev1.Pod) (float64, error)

Expand Down Expand Up @@ -94,6 +99,7 @@ type ConcurrencyWatchdog struct {
}

func NewConcurrencyWatchdog(manager ctrl.Manager, conf *config.Config, targets []*WatchdogTarget) error {
metrics.InitControllerMetrics(concurrencyWatchdogControllerName)
clientset, err := kubernetes.NewForConfig(manager.GetConfig())
if err != nil {
return err
Expand Down Expand Up @@ -135,11 +141,17 @@ func (c *ConcurrencyWatchdog) Start(ctx context.Context) error {

func (c *ConcurrencyWatchdog) tick(ctx context.Context) error {
start := time.Now()
var retErr *multierror.Error
defer func() {
c.logger.Info("finished checking on ingress controller pods", "latencySec", time.Since(start).Seconds())

//placing this call inside a closure allows for result and err to be bound after tick executes
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should maybe just include this comment once in a README or something similar in the controller dir - that way we don't have to repeat it so frequently.

open to leaving it though if we think that's the cleanest solution, just not a fan of how many times we repeat it

//this makes sure they have the proper value
//just calling defer metrics.HandleControllerReconcileMetrics(controllerName, result, err) would bind
//the values of result and err to their zero values, since they were just instantiated
metrics.HandleControllerReconcileMetrics(concurrencyWatchdogControllerName, ctrl.Result{}, retErr.ErrorOrNil())
}()

var retErr *multierror.Error
for _, target := range c.targets {
c.logger.Info("starting checking on ingress controller pods", "labels", target.PodLabels())

Expand Down
47 changes: 26 additions & 21 deletions pkg/controller/keyvault/event_mirror.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,21 @@ import (
"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
netv1 "k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"

"github.com/Azure/aks-app-routing-operator/pkg/config"
"github.com/Azure/aks-app-routing-operator/pkg/controller/metrics"
"github.com/Azure/aks-app-routing-operator/pkg/util"
)

const (
eventMirrorControllerName = "event_mirror"
)

// EventMirror copies events published to pod resources by the Keyvault CSI driver into ingress events.
// This allows users to easily determine why a certificate might be missing for a given ingress.
type EventMirror struct {
Expand All @@ -29,6 +33,7 @@ type EventMirror struct {
}

func NewEventMirror(manager ctrl.Manager, conf *config.Config) error {
metrics.InitControllerMetrics(eventMirrorControllerName)
if conf.DisableKeyvault {
return nil
}
Expand All @@ -44,54 +49,57 @@ func NewEventMirror(manager ctrl.Manager, conf *config.Config) error {
}

func (e *EventMirror) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
var err error
result := ctrl.Result{}

// do metrics
defer func() {
//placing this call inside a closure allows for result and err to be bound after Reconcile executes
//this makes sure they have the proper value
//just calling defer metrics.HandleControllerReconcileMetrics(controllerName, result, err) would bind
//the values of result and err to their zero values, since they were just instantiated
metrics.HandleControllerReconcileMetrics(eventMirrorControllerName, result, err)
}()

logger, err := logr.FromContext(ctx)
if err != nil {
return ctrl.Result{}, err
return result, err
}
logger = logger.WithName("eventMirror")

event := &corev1.Event{}
err = e.client.Get(ctx, req.NamespacedName, event)
if errors.IsNotFound(err) {
return ctrl.Result{}, nil
}
if err != nil {
return ctrl.Result{}, err
return result, client.IgnoreNotFound(err)
}

// Filter to include only keyvault mounting errors
if event.InvolvedObject.Kind != "Pod" ||
event.Reason != "FailedMount" ||
!strings.HasPrefix(event.InvolvedObject.Name, "keyvault-") ||
!strings.Contains(event.Message, "keyvault") {
return ctrl.Result{}, nil
return result, nil
}

// Get the owner (pod)
pod := &corev1.Pod{}
pod.Name = event.InvolvedObject.Name
pod.Namespace = event.InvolvedObject.Namespace
err = e.client.Get(ctx, client.ObjectKeyFromObject(pod), pod)
if errors.IsNotFound(err) {
return ctrl.Result{}, nil
}
if err != nil {
return ctrl.Result{}, err
return result, client.IgnoreNotFound(err)
NickKeller marked this conversation as resolved.
Show resolved Hide resolved
}
if pod.Annotations == nil {
return ctrl.Result{}, nil
return result, nil
}

// Get the owner (ingress)
ingress := &netv1.Ingress{}
ingress.Namespace = pod.Namespace
ingress.Name = pod.Annotations["kubernetes.azure.com/ingress-owner"]
err = e.client.Get(ctx, client.ObjectKeyFromObject(ingress), ingress)
if errors.IsNotFound(err) {
return ctrl.Result{}, nil
}
if err != nil {
return ctrl.Result{}, err
return result, client.IgnoreNotFound(err)
}

// Publish to the service also if ingress is owned by a service
Expand All @@ -100,17 +108,14 @@ func (e *EventMirror) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Res
svc.Namespace = pod.Namespace
svc.Name = name
err = e.client.Get(ctx, client.ObjectKeyFromObject(svc), svc)
if errors.IsNotFound(err) {
return ctrl.Result{}, nil
}
if err != nil {
return ctrl.Result{}, err
return result, client.IgnoreNotFound(err)
}
e.events.Event(svc, "Warning", "FailedMount", event.Message)
}

e.events.Event(ingress, "Warning", "FailedMount", event.Message)
return ctrl.Result{}, nil
return result, nil
}

func (e *EventMirror) newPredicates() predicate.Predicate {
Expand Down
12 changes: 12 additions & 0 deletions pkg/controller/keyvault/event_mirror_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package keyvault

import (
"context"
"github.com/Azure/aks-app-routing-operator/pkg/controller/metrics"
"github.com/Azure/aks-app-routing-operator/pkg/controller/testutils"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: imports

"testing"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -56,9 +58,14 @@ func TestEventMirrorHappyPath(t *testing.T) {
events: recorder,
}

beforeErrCount := testutils.GetErrMetricCount(t, eventMirrorControllerName)
beforeReconcileCount := testutils.GetReconcileMetricCount(t, eventMirrorControllerName, metrics.LabelSuccess)
_, err := e.Reconcile(ctx, req)
require.NoError(t, err)

require.Equal(t, testutils.GetErrMetricCount(t, eventMirrorControllerName), beforeErrCount)
require.Greater(t, testutils.GetReconcileMetricCount(t, eventMirrorControllerName, metrics.LabelSuccess), beforeReconcileCount)

assert.Equal(t, "Warning FailedMount test keyvault event involvedObject{kind=Ingress,apiVersion=networking.k8s.io/v1}", <-recorder.Events)
}

Expand Down Expand Up @@ -107,9 +114,14 @@ func TestEventMirrorServiceOwnerHappyPath(t *testing.T) {
events: recorder,
}

beforeErrCount := testutils.GetErrMetricCount(t, eventMirrorControllerName)
beforeReconcileCount := testutils.GetReconcileMetricCount(t, eventMirrorControllerName, metrics.LabelSuccess)
_, err := e.Reconcile(ctx, req)
require.NoError(t, err)

require.Equal(t, testutils.GetErrMetricCount(t, eventMirrorControllerName), beforeErrCount)
require.Greater(t, testutils.GetReconcileMetricCount(t, eventMirrorControllerName, metrics.LabelSuccess), beforeReconcileCount)

assert.Equal(t, "Warning FailedMount test keyvault event involvedObject{kind=Service,apiVersion=v1}", <-recorder.Events)
assert.Equal(t, "Warning FailedMount test keyvault event involvedObject{kind=Ingress,apiVersion=networking.k8s.io/v1}", <-recorder.Events)
}
Loading