Skip to content

Commit

Permalink
add metrics to track the managed resource count
Browse files Browse the repository at this point in the history
  • Loading branch information
oliviassss committed Jan 29, 2025
1 parent 227315a commit 9700d0e
Show file tree
Hide file tree
Showing 13 changed files with 270 additions and 26 deletions.
1 change: 0 additions & 1 deletion config/webhook/manifests.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
---
apiVersion: admissionregistration.k8s.io/v1
kind: MutatingWebhookConfiguration
metadata:
Expand Down
3 changes: 2 additions & 1 deletion docs/install/iam_policy.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
"elasticloadbalancing:DescribeTags",
"elasticloadbalancing:DescribeTrustStores",
"elasticloadbalancing:DescribeListenerAttributes",
"elasticloadbalancing:DescribeCapacityReservation"
"elasticloadbalancing:DescribeCapacityReservation",
"tag:GetResources"
],
"Resource": "*"
},
Expand Down
3 changes: 2 additions & 1 deletion docs/install/iam_policy_cn.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
"elasticloadbalancing:DescribeTags",
"elasticloadbalancing:DescribeTrustStores",
"elasticloadbalancing:DescribeListenerAttributes",
"elasticloadbalancing:DescribeCapacityReservation"
"elasticloadbalancing:DescribeCapacityReservation",
"tag:GetResources"
],
"Resource": "*"
},
Expand Down
3 changes: 2 additions & 1 deletion docs/install/iam_policy_iso.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
"elasticloadbalancing:DescribeTargetGroups",
"elasticloadbalancing:DescribeTargetGroupAttributes",
"elasticloadbalancing:DescribeTargetHealth",
"elasticloadbalancing:DescribeTags"
"elasticloadbalancing:DescribeTags",
"tag:GetResources"
],
"Resource": "*"
},
Expand Down
3 changes: 2 additions & 1 deletion docs/install/iam_policy_isob.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
"elasticloadbalancing:DescribeTargetGroups",
"elasticloadbalancing:DescribeTargetGroupAttributes",
"elasticloadbalancing:DescribeTargetHealth",
"elasticloadbalancing:DescribeTags"
"elasticloadbalancing:DescribeTags",
"tag:GetResources"
],
"Resource": "*"
},
Expand Down
3 changes: 2 additions & 1 deletion docs/install/iam_policy_isoe.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
"elasticloadbalancing:DescribeTargetGroups",
"elasticloadbalancing:DescribeTargetGroupAttributes",
"elasticloadbalancing:DescribeTargetHealth",
"elasticloadbalancing:DescribeTags"
"elasticloadbalancing:DescribeTags",
"tag:GetResources"
],
"Resource": "*"
},
Expand Down
3 changes: 2 additions & 1 deletion docs/install/iam_policy_isof.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
"elasticloadbalancing:DescribeTargetGroups",
"elasticloadbalancing:DescribeTargetGroupAttributes",
"elasticloadbalancing:DescribeTargetHealth",
"elasticloadbalancing:DescribeTags"
"elasticloadbalancing:DescribeTags",
"tag:GetResources"
],
"Resource": "*"
},
Expand Down
3 changes: 2 additions & 1 deletion docs/install/iam_policy_us-gov.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
"elasticloadbalancing:DescribeTags",
"elasticloadbalancing:DescribeTrustStores",
"elasticloadbalancing:DescribeListenerAttributes",
"elasticloadbalancing:DescribeCapacityReservation"
"elasticloadbalancing:DescribeCapacityReservation",
"tag:GetResources"
],
"Resource": "*"
},
Expand Down
35 changes: 28 additions & 7 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,24 @@ limitations under the License.
package main

import (
"k8s.io/client-go/util/workqueue"
"os"

elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2"

"github.com/go-logr/logr"
"github.com/spf13/pflag"
zapraw "go.uber.org/zap"
k8sruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"os"
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
elbv2controller "sigs.k8s.io/aws-load-balancer-controller/controllers/elbv2"
"sigs.k8s.io/aws-load-balancer-controller/controllers/ingress"
"sigs.k8s.io/aws-load-balancer-controller/controllers/service"
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws"
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/throttle"
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2"
"sigs.k8s.io/aws-load-balancer-controller/pkg/inject"
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
awsmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/aws"
Expand Down Expand Up @@ -84,8 +82,6 @@ func main() {
klog.SetLoggerWithOptions(appLogger, klog.ContextualLogger(true))

var awsMetricsCollector *awsmetrics.Collector
lbcMetricsCollector := lbcmetrics.NewCollector(metrics.Registry)

if metrics.Registry != nil {
awsMetricsCollector = awsmetrics.NewCollector(metrics.Registry)
}
Expand All @@ -107,6 +103,17 @@ func main() {
os.Exit(1)
}

// track the k8s resources with finalizers contains "k8s.aws"
// track the aws resources with cluster tag "elbv2.k8s.aws/cluster=$ClusterName"
lbcMetricsCollector := lbcmetrics.NewCollector(
metrics.Registry,
mgr.GetClient(),
cloud.RGT(),
"k8s.aws",
"elbv2.k8s.aws/cluster",
controllerCFG.ClusterName,
)

clientSet, err := kubernetes.NewForConfig(mgr.GetConfig())
if err != nil {
setupLog.Error(err, "unable to obtain clientSet")
Expand Down Expand Up @@ -163,6 +170,19 @@ func main() {
os.Exit(1)
}

// update of the managed resource metrics
go func() {
if err := lbcMetricsCollector.UpdateManagedK8sResourceMetrics(ctx); err != nil {
setupLog.Error(err, "failed to update managed Kubernetes resource metrics")
}
if err := lbcMetricsCollector.UpdateManagedALBMetrics(ctx); err != nil {
setupLog.Error(err, "failed to update managed ALB metrics")
}
if err := lbcMetricsCollector.UpdateManagedNLBMetrics(ctx); err != nil {
setupLog.Error(err, "failed to update managed NLB metrics")
}
}()

// Add liveness probe
err = mgr.AddHealthzCheck("health-ping", healthz.Ping)
setupLog.Info("adding health check for controller")
Expand Down Expand Up @@ -210,6 +230,7 @@ func main() {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}

}

// loadControllerConfig loads the controller configuration.
Expand Down
9 changes: 7 additions & 2 deletions pkg/metrics/aws/instruments.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,13 @@ func newInstruments(registerer prometheus.Registerer) *instruments {
Name: metricAPIRequestDurationSeconds,
Help: "Latency of an individual HTTP request to the service endpoint",
}, []string{labelService, labelOperation})

registerer.MustRegister(apiCallsTotal, apiCallDurationSeconds, apiCallRetries, apiRequestsTotal, apiRequestDurationSecond)
registerer.MustRegister(
apiCallsTotal,
apiCallDurationSeconds,
apiCallRetries,
apiRequestsTotal,
apiRequestDurationSecond,
)
return &instruments{
apiCallsTotal: apiCallsTotal,
apiCallDurationSeconds: apiCallDurationSeconds,
Expand Down
152 changes: 148 additions & 4 deletions pkg/metrics/lbc/collector.go
Original file line number Diff line number Diff line change
@@ -1,33 +1,81 @@
package lbc

import (
"context"
awssdk "github.com/aws/aws-sdk-go-v2/aws"
rgtsdk "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi"
rgttypes "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi/types"
"github.com/prometheus/client_golang/prometheus"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services"
"strings"

"sigs.k8s.io/controller-runtime/pkg/client"
"time"
)

const (
networkLoadBalancerStr = "nlb"
resourceTypeALB = "elasticloadbalancing:loadbalancer/app"
resourceTypeNLB = "elasticloadbalancing:loadbalancer/net"
)

type MetricCollector interface {
// ObservePodReadinessGateReady this metric is useful to determine how fast pods are becoming ready in the load balancer.
// Due to some architectural constraints, we can only emit this metric for pods that are using readiness gates.
ObservePodReadinessGateReady(namespace string, tgbName string, duration time.Duration)

// UpdateManagedK8sResourceMetrics fetches and updates managed k8s resources metrics.
UpdateManagedK8sResourceMetrics(ctx context.Context) error

// UpdateManagedALBMetrics updates managed ALB count metrics
UpdateManagedALBMetrics(ctx context.Context) error

//UpdateManagedNLBMetrics updates managed NLB count metrics
UpdateManagedNLBMetrics(ctx context.Context) error
}

type collector struct {
instruments *instruments
instruments *instruments
runtimeClient client.Client
rgt services.RGT
finalizerKeyWord string
clusterTagKey string
clusterTagVal string
}

type noOpCollector struct{}

func (n *noOpCollector) ObservePodReadinessGateReady(_ string, _ string, _ time.Duration) {
}

func NewCollector(registerer prometheus.Registerer) MetricCollector {
if registerer == nil {
func (n *noOpCollector) UpdateManagedK8sResourceMetrics(_ context.Context) error {
return nil
}

func (n *noOpCollector) UpdateManagedALBMetrics(_ context.Context) error {
return nil
}

func (n *noOpCollector) UpdateManagedNLBMetrics(_ context.Context) error {
return nil
}

func NewCollector(registerer prometheus.Registerer, runtimeClient client.Client, rgt services.RGT, finalizerKeyWord string, clusterTagKey string, clusterTagVal string) MetricCollector {
if registerer == nil || runtimeClient == nil {
return &noOpCollector{}
}

instruments := newInstruments(registerer)
return &collector{
instruments: instruments,
instruments: instruments,
runtimeClient: runtimeClient,
rgt: rgt,
finalizerKeyWord: finalizerKeyWord,
clusterTagKey: clusterTagKey,
clusterTagVal: clusterTagVal,
}
}

Expand All @@ -37,3 +85,99 @@ func (c *collector) ObservePodReadinessGateReady(namespace string, tgbName strin
labelName: tgbName,
}).Observe(duration.Seconds())
}

func (c *collector) UpdateManagedK8sResourceMetrics(ctx context.Context) error {
listOpts := &client.ListOptions{
Namespace: "",
}
ingressCount, serviceCount, tgbCount := 0, 0, 0
// Fetch ingress count
ingressList := &networkingv1.IngressList{}
err := c.runtimeClient.List(ctx, ingressList, listOpts)
if err != nil {
return err
}
for _, ingress := range ingressList.Items {
for _, finalizer := range ingress.Finalizers {
if strings.Contains(finalizer, c.finalizerKeyWord) {
ingressCount++
break
}
}
}
c.instruments.managedIngressCount.Set(float64(ingressCount))

// Fetch service count
serviceList := &corev1.ServiceList{}
err = c.runtimeClient.List(ctx, serviceList, listOpts)
if err != nil {
return err
}
for _, service := range serviceList.Items {
hasMatchingFinalizer := false
for _, finalizer := range service.Finalizers {
if strings.Contains(finalizer, c.finalizerKeyWord) {
hasMatchingFinalizer = true
break
}
}

if hasMatchingFinalizer && service.Spec.LoadBalancerClass != nil && strings.Contains(*service.Spec.LoadBalancerClass, networkLoadBalancerStr) {
serviceCount++
}
}
c.instruments.managedServiceCount.Set(float64(serviceCount))

// Fetch TargetGroupBinding count
tgbList := &elbv2api.TargetGroupBindingList{}
err = c.runtimeClient.List(ctx, tgbList, listOpts)
if err != nil {
return err
}
for _, tgb := range tgbList.Items {
for _, finalizer := range tgb.Finalizers {
if strings.Contains(finalizer, c.finalizerKeyWord) {
tgbCount++
break
}
}
}
c.instruments.managedTGBCount.Set(float64(tgbCount))

return nil
}

func (c *collector) UpdateManagedALBMetrics(ctx context.Context) error {
count, err := c.getManagedAWSResourceMetrics(ctx, resourceTypeALB)
if err != nil {
return err
}
c.instruments.managedALBCount.Set(float64(count))
return nil
}

func (c *collector) UpdateManagedNLBMetrics(ctx context.Context) error {
count, err := c.getManagedAWSResourceMetrics(ctx, resourceTypeNLB)
if err != nil {
return err
}
c.instruments.managedNLBCount.Set(float64(count))
return nil
}

func (c *collector) getManagedAWSResourceMetrics(ctx context.Context, resourceType string) (count int, err error) {
req := &rgtsdk.GetResourcesInput{
ResourceTypeFilters: []string{resourceType},
TagFilters: []rgttypes.TagFilter{
{
Key: awssdk.String(c.clusterTagKey),
Values: []string{c.clusterTagVal},
},
},
}
resources, err := c.rgt.GetResourcesAsList(ctx, req)
if err != nil {
return 0, err
}
return len(resources), nil
}
Loading

0 comments on commit 9700d0e

Please sign in to comment.