Skip to content

Commit

Permalink
Merge pull request #1685 from kl52752/metrics-review-fixes
Browse files Browse the repository at this point in the history
Review fixes
  • Loading branch information
k8s-ci-robot authored Apr 26, 2022
2 parents 2781c36 + 6a5fcbe commit c82ad9d
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 143 deletions.
28 changes: 16 additions & 12 deletions pkg/l4lb/l4netlbcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
"k8s.io/ingress-gce/pkg/context"
"k8s.io/ingress-gce/pkg/controller/translator"
"k8s.io/ingress-gce/pkg/instances"
l4metrics "k8s.io/ingress-gce/pkg/l4lb/metrics"
"k8s.io/ingress-gce/pkg/l4lb/metrics"
"k8s.io/ingress-gce/pkg/loadbalancers"
"k8s.io/ingress-gce/pkg/utils"
"k8s.io/ingress-gce/pkg/utils/common"
Expand Down Expand Up @@ -267,7 +267,7 @@ func (lc *L4NetLBController) checkHealth() error {
msg := fmt.Sprintf("L4 External LoadBalancer Sync happened at time %v - %v after enqueue time, threshold is %v", lastSyncTime, lastSyncTime.Sub(lastEnqueueTime), enqueueToSyncDelayThreshold)
// Log here, context/http handler do no log the error.
klog.Error(msg)
l4metrics.PublishL4FailedHealthCheckCount(l4NetLBControllerName)
metrics.PublishL4FailedHealthCheckCount(l4NetLBControllerName)
}
return nil
}
Expand Down Expand Up @@ -296,14 +296,13 @@ func (lc *L4NetLBController) sync(key string) error {
klog.V(3).Infof("Ignoring sync of non-existent service %s", key)
return nil
}
namespacedName := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}.String()
if lc.needsDeletion(svc) {
klog.V(3).Infof("Deleting L4 External LoadBalancer resources for service %s", key)
result := lc.garbageCollectRBSNetLB(key, svc)
if result == nil {
return nil
}
lc.publishMetrics(result, namespacedName)
lc.publishMetrics(result, svc.Name, svc.Namespace)
return result.Error
}

Expand All @@ -313,7 +312,7 @@ func (lc *L4NetLBController) sync(key string) error {
// result will be nil if the service was ignored(due to presence of service controller finalizer).
return nil
}
lc.publishMetrics(result, namespacedName)
lc.publishMetrics(result, svc.Name, svc.Namespace)
return result.Error
}
klog.V(3).Infof("Ignoring sync of service %s, neither delete nor ensure needed.", key)
Expand Down Expand Up @@ -449,24 +448,29 @@ func (lc *L4NetLBController) garbageCollectRBSNetLB(key string, svc *v1.Service)
}

// publishMetrics sets controller metrics for NetLB services and pushes NetLB metrics based on sync type.
func (lc *L4NetLBController) publishMetrics(result *loadbalancers.L4NetLBSyncResult, namespacedName string) {
if result == nil {
return
}
func (lc *L4NetLBController) publishMetrics(result *loadbalancers.L4NetLBSyncResult, name, namespace string) {
namespacedName := types.NamespacedName{Name: name, Namespace: namespace}.String()
switch result.SyncType {
case loadbalancers.SyncTypeCreate, loadbalancers.SyncTypeUpdate:
klog.V(4).Infof("External L4 Loadbalancer for Service %s ensured, updating its state %v in metrics cache", namespacedName, result.MetricsState)
lc.ctx.ControllerMetrics.SetL4NetLBService(namespacedName, result.MetricsState)
l4metrics.PublishNetLBSyncMetrics(result.Error == nil, result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)

lc.publishSyncMetrics(result)
case loadbalancers.SyncTypeDelete:
// if service is successfully deleted, remove it from cache
if result.Error == nil {
klog.V(4).Infof("External L4 Loadbalancer for Service %s deleted, removing its state from metrics cache", namespacedName)
lc.ctx.ControllerMetrics.DeleteL4NetLBService(namespacedName)
}
l4metrics.PublishNetLBSyncMetrics(result.Error == nil, result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)
lc.publishSyncMetrics(result)
default:
klog.Warningf("Unknown sync type %q, skipping metrics", result.SyncType)
}
}

func (lc *L4NetLBController) publishSyncMetrics(result *loadbalancers.L4NetLBSyncResult) {
if result.Error == nil {
metrics.PublishL4NetLBSyncSuccess(result.SyncType, result.StartTime)
return
}
metrics.PublishL4NetLBSyncError(result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)
}
29 changes: 9 additions & 20 deletions pkg/l4lb/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ var (
prometheus.HistogramOpts{
Name: L4netlbLatencyMetricName,
Help: "Latency of an L4 NetLB Sync",
// custom buckets - [15s, 30s, 60s, 120s, 240s(4min), 480s(8min), 960s(16m), +Inf]
Buckets: prometheus.ExponentialBuckets(15, 2, 6),
// custom buckets - [15s, 30s, 60s, 120s, 240s(4min), 480s(8min), 960s(16m), 1920s(32min), 3840s(64min), 7680s(128m) +Inf]
Buckets: prometheus.ExponentialBuckets(15, 2, 10),
},
l4LBSyncLatencyMetricsLabels,
)
Expand Down Expand Up @@ -122,26 +122,15 @@ func publishL4ILBSyncErrorCount(syncType, gceResource, errorType string) {
l4ILBSyncErrorCount.WithLabelValues(syncType, gceResource, errorType).Inc()
}

// PublishL4NetLBSyncMetrics exports metrics related to the L4 NetLB sync.
func PublishNetLBSyncMetrics(success bool, syncType, gceResource, errType string, startTime time.Time) {
publishL4NetLBSyncLatency(success, syncType, startTime)
if !success {
publishL4NetLBSyncErrorCount(syncType, gceResource, errType)
}
}

// publishL4NetLBSyncLatency exports the given sync latency datapoint.
func publishL4NetLBSyncLatency(success bool, syncType string, startTime time.Time) {
status := statusSuccess
if !success {
status = statusError
}
l4NetLBSyncLatency.WithLabelValues(status, syncType).Observe(time.Since(startTime).Seconds())
// PublishL4NetLBSyncSuccess exports latency metrics for L4 NetLB service after successful sync.
func PublishL4NetLBSyncSuccess(syncType string, startTime time.Time) {
l4NetLBSyncLatency.WithLabelValues(statusSuccess, syncType).Observe(time.Since(startTime).Seconds())
}

// publishL4NetLBSyncLatency exports the given sync latency datapoint.
func publishL4NetLBSyncErrorCount(syncType, gceResource, errorType string) {
l4NetLBSyncErrorCount.WithLabelValues(syncType, gceResource, errorType).Inc()
// PublishL4NetLBSyncError exports latency and error count metrics for L4 NetLB after error sync.
func PublishL4NetLBSyncError(syncType, gceResource, errType string, startTime time.Time) {
l4NetLBSyncLatency.WithLabelValues(statusError, syncType).Observe(time.Since(startTime).Seconds())
l4NetLBSyncErrorCount.WithLabelValues(syncType, gceResource, errType).Inc()
}

// PublishL4FailedHealthCheckCount observers failed healt check from controller.
Expand Down
8 changes: 2 additions & 6 deletions pkg/loadbalancers/l4netlb.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,8 @@ func (l4netlb *L4NetLB) EnsureFrontend(nodeNames []string, svc *corev1.Service)
result.Annotations[annotations.UDPForwardingRuleKey] = fr.Name
}
result.Status = &corev1.LoadBalancerStatus{Ingress: []corev1.LoadBalancerIngress{{IP: fr.IPAddress}}}
if fr.NetworkTier == cloud.NetworkTierPremium.ToGCEValue() {
result.MetricsState.IsPremiumTier = true
}
if ipAddrType == IPAddrManaged {
result.MetricsState.IsManagedIP = true
}
result.MetricsState.IsPremiumTier = fr.NetworkTier == cloud.NetworkTierPremium.ToGCEValue()
result.MetricsState.IsManagedIP = ipAddrType == IPAddrManaged
return result
}

Expand Down
100 changes: 42 additions & 58 deletions pkg/metrics/l4metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,74 +182,64 @@ func TestComputeL4NetLBMetrics(t *testing.T) {
for _, tc := range []struct {
desc string
serviceStates []L4NetLBServiceState
expectL4NetLBCount map[feature]int
expectL4NetLBCount netLBFeatureCount
}{
{
desc: "empty input",
serviceStates: []L4NetLBServiceState{},
expectL4NetLBCount: map[feature]int{
l4NetLBService: 0,
l4NetLBManagedStaticIP: 0,
l4NetLBStaticIP: 0,
l4NetLBPremiumNetworkTier: 0,
l4NetLBInSuccess: 0,
l4NetLBInError: 0,
expectL4NetLBCount: netLBFeatureCount{
service: 0,
managedStaticIP: 0,
premiumNetworkTier: 0,
success: 0,
},
},
{
desc: "one l4 Netlb service",
serviceStates: []L4NetLBServiceState{
newL4NetLBServiceState(isSuccess, unmanaged, standard),
},
expectL4NetLBCount: map[feature]int{
l4NetLBService: 1,
l4NetLBManagedStaticIP: 0,
l4NetLBStaticIP: 1,
l4NetLBPremiumNetworkTier: 0,
l4NetLBInSuccess: 1,
l4NetLBInError: 0,
expectL4NetLBCount: netLBFeatureCount{
service: 1,
managedStaticIP: 0,
premiumNetworkTier: 0,
success: 1,
},
},
{
desc: "one l4 Netlb service in premium network tier",
serviceStates: []L4NetLBServiceState{
newL4NetLBServiceState(isSuccess, unmanaged, premium),
},
expectL4NetLBCount: map[feature]int{
l4NetLBService: 1,
l4NetLBManagedStaticIP: 0,
l4NetLBStaticIP: 1,
l4NetLBPremiumNetworkTier: 1,
l4NetLBInSuccess: 1,
l4NetLBInError: 0,
expectL4NetLBCount: netLBFeatureCount{
service: 1,
managedStaticIP: 0,
premiumNetworkTier: 1,
success: 1,
},
},
{
desc: "one l4 Netlb service in premium network tier and managed static ip",
serviceStates: []L4NetLBServiceState{
newL4NetLBServiceState(isSuccess, managed, premium),
},
expectL4NetLBCount: map[feature]int{
l4NetLBService: 1,
l4NetLBManagedStaticIP: 1,
l4NetLBStaticIP: 1,
l4NetLBPremiumNetworkTier: 1,
l4NetLBInSuccess: 1,
l4NetLBInError: 0,
expectL4NetLBCount: netLBFeatureCount{
service: 1,
managedStaticIP: 1,
premiumNetworkTier: 1,
success: 1,
},
},
{
desc: "l4 Netlb service in error state",
serviceStates: []L4NetLBServiceState{
newL4NetLBServiceState(isError, unmanaged, standard),
},
expectL4NetLBCount: map[feature]int{
l4NetLBService: 1,
l4NetLBManagedStaticIP: 0,
l4NetLBStaticIP: 0,
l4NetLBPremiumNetworkTier: 0,
l4NetLBInSuccess: 0,
l4NetLBInError: 1,
expectL4NetLBCount: netLBFeatureCount{
service: 1,
managedStaticIP: 0,
premiumNetworkTier: 0,
success: 0,
},
},
{
Expand All @@ -258,13 +248,11 @@ func TestComputeL4NetLBMetrics(t *testing.T) {
newL4NetLBServiceState(isError, unmanaged, standard),
newL4NetLBServiceState(isError, managed, premium),
},
expectL4NetLBCount: map[feature]int{
l4NetLBService: 2,
l4NetLBManagedStaticIP: 0,
l4NetLBStaticIP: 0,
l4NetLBPremiumNetworkTier: 0,
l4NetLBInSuccess: 0,
l4NetLBInError: 2,
expectL4NetLBCount: netLBFeatureCount{
service: 2,
managedStaticIP: 0,
premiumNetworkTier: 0,
success: 0,
},
},
{
Expand All @@ -273,13 +261,11 @@ func TestComputeL4NetLBMetrics(t *testing.T) {
newL4NetLBServiceState(isSuccess, unmanaged, standard),
newL4NetLBServiceState(isSuccess, unmanaged, premium),
},
expectL4NetLBCount: map[feature]int{
l4NetLBService: 2,
l4NetLBManagedStaticIP: 0,
l4NetLBStaticIP: 2,
l4NetLBPremiumNetworkTier: 1,
l4NetLBInSuccess: 2,
l4NetLBInError: 0,
expectL4NetLBCount: netLBFeatureCount{
service: 2,
managedStaticIP: 0,
premiumNetworkTier: 1,
success: 2,
},
},
{
Expand All @@ -294,13 +280,11 @@ func TestComputeL4NetLBMetrics(t *testing.T) {
newL4NetLBServiceState(isError, managed, premium),
newL4NetLBServiceState(isError, managed, standard),
},
expectL4NetLBCount: map[feature]int{
l4NetLBService: 8,
l4NetLBManagedStaticIP: 3,
l4NetLBStaticIP: 6,
l4NetLBPremiumNetworkTier: 4,
l4NetLBInSuccess: 6,
l4NetLBInError: 2,
expectL4NetLBCount: netLBFeatureCount{
service: 8,
managedStaticIP: 3,
premiumNetworkTier: 4,
success: 6,
},
},
} {
Expand All @@ -312,7 +296,7 @@ func TestComputeL4NetLBMetrics(t *testing.T) {
newMetrics.SetL4NetLBService(fmt.Sprint(i), serviceState)
}
got := newMetrics.computeL4NetLBMetrics()
if diff := cmp.Diff(tc.expectL4NetLBCount, got); diff != "" {
if diff := cmp.Diff(tc.expectL4NetLBCount, got, cmp.AllowUnexported(netLBFeatureCount{})); diff != "" {
t.Fatalf("Got diff for L4 NetLB service counts (-want +got):\n%s", diff)
}
})
Expand Down
42 changes: 24 additions & 18 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,22 @@ var (
)
)

// netLBFeatureCount define metric feature count for L4NetLB controller
type netLBFeatureCount struct {
service int
managedStaticIP int
premiumNetworkTier int
success int
}

func (netlbCount *netLBFeatureCount) record() {
l4NetLBCount.With(prometheus.Labels{label: l4NetLBService.String()}).Set(float64(netlbCount.service))
l4NetLBCount.With(prometheus.Labels{label: l4NetLBStaticIP.String()}).Set(float64(netlbCount.success))
l4NetLBCount.With(prometheus.Labels{label: l4NetLBManagedStaticIP.String()}).Set(float64(netlbCount.managedStaticIP))
l4NetLBCount.With(prometheus.Labels{label: l4NetLBInSuccess.String()}).Set(float64(netlbCount.success))
l4NetLBCount.With(prometheus.Labels{label: l4NetLBInError.String()}).Set(float64(netlbCount.service - netlbCount.success))
}

// init registers ingress usage metrics.
func init() {
klog.V(3).Infof("Registering Ingress usage metrics %v and %v, NEG usage metrics %v", ingressCount, servicePortCount, networkEndpointGroupCount)
Expand Down Expand Up @@ -338,9 +354,8 @@ func (im *ControllerMetrics) export() {

netlbCount := im.computeL4NetLBMetrics()
klog.V(3).Infof("Exporting L4 NetLB usage metrics: %#v", netlbCount)
for feature, count := range netlbCount {
l4NetLBCount.With(prometheus.Labels{label: feature.String()}).Set(float64(count))
}
netlbCount.record()

klog.V(3).Infof("L4 NetLB usage metrics exported.")

saCount := im.computePSCMetrics()
Expand Down Expand Up @@ -491,34 +506,25 @@ func (im *ControllerMetrics) computeL4ILBMetrics() map[feature]int {
}

// computeL4NetLBMetrics aggregates L4 NetLB metrics in the cache.
func (im *ControllerMetrics) computeL4NetLBMetrics() map[feature]int {
func (im *ControllerMetrics) computeL4NetLBMetrics() netLBFeatureCount {
im.Lock()
defer im.Unlock()
klog.V(4).Infof("Computing L4 NetLB usage metrics from service state map: %#v", im.l4NetLBServiceMap)
counts := map[feature]int{
l4NetLBService: 0,
l4NetLBStaticIP: 0,
l4NetLBManagedStaticIP: 0,
l4NetLBPremiumNetworkTier: 0,
l4NetLBInSuccess: 0,
l4NetLBInError: 0,
}
var counts netLBFeatureCount

for key, state := range im.l4NetLBServiceMap {
klog.V(6).Infof("NetLB Service %s has metrics %+v", key, state)
counts[l4NetLBService]++
counts.service++
if !state.InSuccess {
counts[l4NetLBInError]++
// Skip counting other features if the service is in error state.
continue
}
counts[l4NetLBInSuccess]++
counts[l4NetLBStaticIP]++
counts.success++
if state.IsManagedIP {
counts[l4NetLBManagedStaticIP]++
counts.managedStaticIP++
}
if state.IsPremiumTier {
counts[l4NetLBPremiumNetworkTier]++
counts.premiumNetworkTier++
}
}
klog.V(4).Info("L4 NetLB usage metrics computed.")
Expand Down
Loading

0 comments on commit c82ad9d

Please sign in to comment.