From 3074af336cb6aa29ba55d2160e5112ff6f706891 Mon Sep 17 00:00:00 2001 From: Slavik Panasovets Date: Fri, 19 May 2023 22:07:15 +0000 Subject: [PATCH] Reset dualstack metrics before exporting, move them to separate file --- pkg/metrics/l4_dualstack_metrics.go | 174 +++++++++++++++ pkg/metrics/l4_dualstack_metrics_test.go | 271 +++++++++++++++++++++++ pkg/metrics/l4metrics_test.go | 262 ---------------------- pkg/metrics/metrics.go | 151 +------------ 4 files changed, 447 insertions(+), 411 deletions(-) create mode 100644 pkg/metrics/l4_dualstack_metrics.go create mode 100644 pkg/metrics/l4_dualstack_metrics_test.go diff --git a/pkg/metrics/l4_dualstack_metrics.go b/pkg/metrics/l4_dualstack_metrics.go new file mode 100644 index 0000000000..4470632ef6 --- /dev/null +++ b/pkg/metrics/l4_dualstack_metrics.go @@ -0,0 +1,174 @@ +package metrics + +import ( + "strings" + "time" + + "github.com/prometheus/client_golang/prometheus" + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" +) + +var ( + l4ILBDualStackCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "number_of_l4_dual_stack_ilbs", + Help: "Number of L4 ILBs with DualStack enabled", + }, + []string{"ip_families", "ip_family_policy", "status"}, + ) + l4NetLBDualStackCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "number_of_l4_dual_stack_netlbs", + Help: "Number of L4 NetLBs with DualStack enabled", + }, + []string{"ip_families", "ip_family_policy", "status"}, + ) +) + +func InitServiceDualStackMetricsState(svc *corev1.Service, startTime *time.Time) L4DualStackServiceState { + state := L4DualStackServiceState{} + + state.IPFamilies = ipFamiliesToString(svc.Spec.IPFamilies) + + state.IPFamilyPolicy = "" + if svc.Spec.IPFamilyPolicy != nil { + state.IPFamilyPolicy = string(*svc.Spec.IPFamilyPolicy) + } + + // Always init status with error, and update with Success when service was provisioned + state.Status = StatusError + state.FirstSyncErrorTime = startTime + return state +} + +func (im *ControllerMetrics) exportL4ILBDualStackMetrics() { + // need to reset, otherwise metrics counted on previous exports will be still stored in a prometheus state + l4ILBDualStackCount.Reset() + + ilbDualStackCount := im.computeL4ILBDualStackMetrics() + klog.V(3).Infof("Exporting L4 ILB DualStack usage metrics: %#v", ilbDualStackCount) + for state, count := range ilbDualStackCount { + l4ILBDualStackCount.With(prometheus.Labels{ + "ip_families": state.IPFamilies, + "ip_family_policy": state.IPFamilyPolicy, + "status": string(state.Status), + }).Set(float64(count)) + } + klog.V(3).Infof("L4 ILB DualStack usage metrics exported.") +} + +func (im *ControllerMetrics) exportL4NetLBDualStackMetrics() { + // need to reset, otherwise metrics counted on previous exports will be still stored in a prometheus state + l4NetLBDualStackCount.Reset() + + netlbDualStackCount := im.computeL4NetLBDualStackMetrics() + klog.V(3).Infof("Exporting L4 NetLB DualStack usage metrics: %#v", netlbDualStackCount) + for state, count := range netlbDualStackCount { + l4NetLBDualStackCount.With(prometheus.Labels{ + "ip_families": state.IPFamilies, + "ip_family_policy": state.IPFamilyPolicy, + "status": string(state.Status), + }).Set(float64(count)) + } + klog.V(3).Infof("L4 Netlb DualStack usage metrics exported.") +} + +// SetL4ILBDualStackService implements L4ILBMetricsCollector. +func (im *ControllerMetrics) SetL4ILBDualStackService(svcKey string, state L4DualStackServiceState) { + im.Lock() + defer im.Unlock() + + if im.l4ILBDualStackServiceMap == nil { + klog.Fatalf("L4 ILB DualStack Metrics failed to initialize correctly.") + } + if state.Status == StatusError { + if previousState, ok := im.l4ILBDualStackServiceMap[svcKey]; ok && previousState.FirstSyncErrorTime != nil { + // If service is in Error state and retry timestamp was set then do not update it. + state.FirstSyncErrorTime = previousState.FirstSyncErrorTime + } + } + im.l4ILBDualStackServiceMap[svcKey] = state +} + +// DeleteL4ILBDualStackService implements L4ILBMetricsCollector. +func (im *ControllerMetrics) DeleteL4ILBDualStackService(svcKey string) { + im.Lock() + defer im.Unlock() + + delete(im.l4ILBDualStackServiceMap, svcKey) +} + +// SetL4NetLBDualStackService implements L4NetLBMetricsCollector. +func (im *ControllerMetrics) SetL4NetLBDualStackService(svcKey string, state L4DualStackServiceState) { + im.Lock() + defer im.Unlock() + + if im.l4NetLBDualStackServiceMap == nil { + klog.Fatalf("L4 NetLB DualStack Metrics failed to initialize correctly.") + } + + if state.Status == StatusError { + if previousState, ok := im.l4NetLBDualStackServiceMap[svcKey]; ok && previousState.FirstSyncErrorTime != nil { + // If service is in Error state and retry timestamp was set then do not update it. + state.FirstSyncErrorTime = previousState.FirstSyncErrorTime + } + } + im.l4NetLBDualStackServiceMap[svcKey] = state +} + +// DeleteL4NetLBDualStackService implements L4NetLBMetricsCollector. +func (im *ControllerMetrics) DeleteL4NetLBDualStackService(svcKey string) { + im.Lock() + defer im.Unlock() + + delete(im.l4NetLBDualStackServiceMap, svcKey) +} + +// computeL4ILBDualStackMetrics aggregates L4 ILB DualStack metrics in the cache. +func (im *ControllerMetrics) computeL4ILBDualStackMetrics() map[L4DualStackServiceLabels]int { + im.Lock() + defer im.Unlock() + klog.V(4).Infof("Computing L4 DualStack ILB usage metrics from service state map: %#v", im.l4ILBDualStackServiceMap) + counts := map[L4DualStackServiceLabels]int{} + + for key, state := range im.l4ILBDualStackServiceMap { + klog.V(6).Infof("ILB Service %s has IPFamilies: %v, IPFamilyPolicy: %t, Status: %v", key, state.IPFamilies, state.IPFamilyPolicy, state.Status) + if state.Status == StatusError && + state.FirstSyncErrorTime != nil && + time.Since(*state.FirstSyncErrorTime) >= persistentErrorThresholdTime { + state.Status = StatusPersistentError + } + counts[state.L4DualStackServiceLabels]++ + } + klog.V(4).Info("L4 ILB usage metrics computed.") + return counts +} + +// computeL4NetLBDualStackMetrics aggregates L4 NetLB DualStack metrics in the cache. +func (im *ControllerMetrics) computeL4NetLBDualStackMetrics() map[L4DualStackServiceLabels]int { + im.Lock() + defer im.Unlock() + klog.V(4).Infof("Computing L4 DualStack NetLB usage metrics from service state map: %#v", im.l4NetLBDualStackServiceMap) + counts := map[L4DualStackServiceLabels]int{} + + for key, state := range im.l4NetLBDualStackServiceMap { + klog.V(6).Infof("NetLB Service %s has IPFamilies: %v, IPFamilyPolicy: %t, Status: %v", key, state.IPFamilies, state.IPFamilyPolicy, state.Status) + if state.Status == StatusError && + state.FirstSyncErrorTime != nil && + time.Since(*state.FirstSyncErrorTime) >= persistentErrorThresholdTime { + state.Status = StatusPersistentError + } + counts[state.L4DualStackServiceLabels]++ + } + klog.V(4).Info("L4 NetLB usage metrics computed.") + return counts +} + +func ipFamiliesToString(ipFamilies []corev1.IPFamily) string { + var ipFamiliesStrings []string + for _, ipFamily := range ipFamilies { + ipFamiliesStrings = append(ipFamiliesStrings, string(ipFamily)) + } + return strings.Join(ipFamiliesStrings, ",") +} diff --git a/pkg/metrics/l4_dualstack_metrics_test.go b/pkg/metrics/l4_dualstack_metrics_test.go new file mode 100644 index 0000000000..0d496c32bf --- /dev/null +++ b/pkg/metrics/l4_dualstack_metrics_test.go @@ -0,0 +1,271 @@ +package metrics + +import ( + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" +) + +func TestComputeL4ILBDualStackMetrics(t *testing.T) { + t.Parallel() + + currTime := time.Now() + before10min := currTime.Add(-10 * time.Minute) + before20min := currTime.Add(-20 * time.Minute) + + for _, tc := range []struct { + desc string + serviceStates []L4DualStackServiceState + expectL4ILBDualStackCount map[L4DualStackServiceLabels]int + }{ + { + desc: "empty input", + serviceStates: []L4DualStackServiceState{}, + expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{}, + }, + { + desc: "one l4 ilb dual-stack service", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4", "SingleStack", StatusSuccess, nil), + }, + expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusSuccess}: 1, + }, + }, + { + desc: "l4 ilb dual-stack service in error state", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil), + }, + expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1, + }, + }, + { + desc: "l4 ilb dual-stack service in error state, for 10 minutes", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min), + }, + expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{ + "IPv4", + "SingleStack", + StatusError, + }: 1, + }, + }, + { + desc: "l4 ilb dual-stack service in error state, for 20 minutes", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min), + }, + expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{ + "IPv4", + "SingleStack", + StatusPersistentError, + }: 1, + }, + }, + { + desc: "L4 ILB dual-stack service with IPv4,IPv6 Families", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), + }, + expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 1}, + }, + { + desc: "many l4 ilb dual-stack services", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), + newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil), + newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil), + newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil), + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min), + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min), + }, + expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 2, + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 2, + L4DualStackServiceLabels{"IPv6", "SingleStack", StatusSuccess}: 2, + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1, + }, + }, + } { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + newMetrics := FakeControllerMetrics() + for i, serviceState := range tc.serviceStates { + newMetrics.SetL4ILBDualStackService(fmt.Sprint(i), serviceState) + } + got := newMetrics.computeL4ILBDualStackMetrics() + if diff := cmp.Diff(tc.expectL4ILBDualStackCount, got); diff != "" { + t.Fatalf("Got diff for L4 ILB Dual-Stack service counts (-want +got):\n%s", diff) + } + }) + } +} + +func TestComputeL4NetLBDualStackMetrics(t *testing.T) { + t.Parallel() + + currTime := time.Now() + before10min := currTime.Add(-10 * time.Minute) + before20min := currTime.Add(-20 * time.Minute) + + for _, tc := range []struct { + desc string + serviceStates []L4DualStackServiceState + expectL4NetLBDualStackCount map[L4DualStackServiceLabels]int + }{ + { + desc: "empty input", + serviceStates: []L4DualStackServiceState{}, + expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{}, + }, + { + desc: "one l4 NetLB dual-stack service", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4", "SingleStack", StatusSuccess, nil), + }, + expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusSuccess}: 1, + }, + }, + { + desc: "l4 NetLB dual-stack service in error state", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil), + }, + expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1, + }, + }, + { + desc: "l4 NetLB dual-stack service in error state, for 10 minutes", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min), + }, + expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1, + }, + }, + { + desc: "l4 NetLB dual-stack service in error state, for 20 minutes", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min), + }, + expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1, + }, + }, + { + desc: "L4 NetLB dual-stack service with IPv4,IPv6 Families", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), + }, + expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 1, + }, + }, + { + desc: "many l4 NetLB dual-stack services", + serviceStates: []L4DualStackServiceState{ + newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), + newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil), + newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil), + newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil), + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min), + newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min), + }, + expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ + L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 2, + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 2, + L4DualStackServiceLabels{"IPv6", "SingleStack", StatusSuccess}: 2, + L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1, + }, + }, + } { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + newMetrics := FakeControllerMetrics() + for i, serviceState := range tc.serviceStates { + newMetrics.SetL4NetLBDualStackService(fmt.Sprint(i), serviceState) + } + got := newMetrics.computeL4NetLBDualStackMetrics() + if diff := cmp.Diff(tc.expectL4NetLBDualStackCount, got); diff != "" { + t.Fatalf("Got diff for L4 NetLB Dual-Stack service counts (-want +got):\n%s", diff) + } + }) + } +} + +func TestRetryPeriodForL4ILBDualStackServices(t *testing.T) { + t.Parallel() + currTime := time.Now() + before5min := currTime.Add(-5 * time.Minute) + + svcName1 := "svc1" + metrics := FakeControllerMetrics() + + errorState := newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &currTime) + metrics.SetL4ILBDualStackService(svcName1, errorState) + + // change FirstSyncErrorTime and verify it will not change metrics state + errorState.FirstSyncErrorTime = &before5min + metrics.SetL4ILBDualStackService(svcName1, errorState) + state, ok := metrics.l4ILBDualStackServiceMap[svcName1] + if !ok { + t.Fatalf("state should be set") + } + if *state.FirstSyncErrorTime != currTime { + t.Errorf("FirstSyncErrorTime should not change, expected %v, got %v", currTime, *state.FirstSyncErrorTime) + } + if state.Status != StatusError { + t.Errorf("Expected status %s, got %s", StatusError, state.Status) + } +} + +func TestRetryPeriodForL4NetLBDualStackServices(t *testing.T) { + t.Parallel() + currTime := time.Now() + before5min := currTime.Add(-5 * time.Minute) + + svcName1 := "svc1" + metrics := FakeControllerMetrics() + + errorState := newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &currTime) + metrics.SetL4NetLBDualStackService(svcName1, errorState) + + // change FirstSyncErrorTime and verify it will not change metrics state + errorState.FirstSyncErrorTime = &before5min + metrics.SetL4NetLBDualStackService(svcName1, errorState) + state, ok := metrics.l4NetLBDualStackServiceMap[svcName1] + if !ok { + t.Fatalf("state should be set") + } + if *state.FirstSyncErrorTime != currTime { + t.Errorf("FirstSyncErrorTime should not change, expected %v, got %v", currTime, *state.FirstSyncErrorTime) + } + if state.Status != StatusError { + t.Errorf("Expected status %s, got %s", StatusError, state.Status) + } +} + +func newL4DualStackServiceState(ipFamilies string, ipFamilyPolicy string, status L4DualStackServiceStatus, firstSyncErrorTime *time.Time) L4DualStackServiceState { + return L4DualStackServiceState{ + L4DualStackServiceLabels: L4DualStackServiceLabels{ + IPFamilies: ipFamilies, + IPFamilyPolicy: ipFamilyPolicy, + Status: status, + }, + FirstSyncErrorTime: firstSyncErrorTime, + } +} diff --git a/pkg/metrics/l4metrics_test.go b/pkg/metrics/l4metrics_test.go index 678fc34d13..5fddc9ed53 100644 --- a/pkg/metrics/l4metrics_test.go +++ b/pkg/metrics/l4metrics_test.go @@ -379,102 +379,6 @@ func TestComputeL4NetLBMetrics(t *testing.T) { } } -func TestComputeL4NetLBDualStackMetrics(t *testing.T) { - t.Parallel() - - currTime := time.Now() - before10min := currTime.Add(-10 * time.Minute) - before20min := currTime.Add(-20 * time.Minute) - - for _, tc := range []struct { - desc string - serviceStates []L4DualStackServiceState - expectL4NetLBDualStackCount map[L4DualStackServiceLabels]int - }{ - { - desc: "empty input", - serviceStates: []L4DualStackServiceState{}, - expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{}, - }, - { - desc: "one l4 NetLB dual-stack service", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4", "SingleStack", StatusSuccess, nil), - }, - expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusSuccess}: 1, - }, - }, - { - desc: "l4 NetLB dual-stack service in error state", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil), - }, - expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1, - }, - }, - { - desc: "l4 NetLB dual-stack service in error state, for 10 minutes", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min), - }, - expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1, - }, - }, - { - desc: "l4 NetLB dual-stack service in error state, for 20 minutes", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min), - }, - expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1, - }, - }, - { - desc: "L4 NetLB dual-stack service with IPv4,IPv6 Families", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), - }, - expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 1, - }, - }, - { - desc: "many l4 NetLB dual-stack services", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), - newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil), - newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil), - newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil), - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min), - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min), - }, - expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 2, - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 2, - L4DualStackServiceLabels{"IPv6", "SingleStack", StatusSuccess}: 2, - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1, - }, - }, - } { - tc := tc - t.Run(tc.desc, func(t *testing.T) { - t.Parallel() - newMetrics := FakeControllerMetrics() - for i, serviceState := range tc.serviceStates { - newMetrics.SetL4NetLBDualStackService(fmt.Sprint(i), serviceState) - } - got := newMetrics.computeL4NetLBDualStackMetrics() - if diff := cmp.Diff(tc.expectL4NetLBDualStackCount, got); diff != "" { - t.Fatalf("Got diff for L4 NetLB Dual-Stack service counts (-want +got):\n%s", diff) - } - }) - } -} - func newL4NetLBServiceState(inSuccess, managed, premium, userError bool, errorTimestamp *time.Time) L4NetLBServiceState { return L4NetLBServiceState{ IsPremiumTier: premium, @@ -516,58 +420,6 @@ func TestRetryPeriodForL4NetLBServices(t *testing.T) { } } -func TestRetryPeriodForL4ILBDualStackServices(t *testing.T) { - t.Parallel() - currTime := time.Now() - before5min := currTime.Add(-5 * time.Minute) - - svcName1 := "svc1" - metrics := FakeControllerMetrics() - - errorState := newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &currTime) - metrics.SetL4ILBDualStackService(svcName1, errorState) - - // change FirstSyncErrorTime and verify it will not change metrics state - errorState.FirstSyncErrorTime = &before5min - metrics.SetL4ILBDualStackService(svcName1, errorState) - state, ok := metrics.l4ILBDualStackServiceMap[svcName1] - if !ok { - t.Fatalf("state should be set") - } - if *state.FirstSyncErrorTime != currTime { - t.Errorf("FirstSyncErrorTime should not change, expected %v, got %v", currTime, *state.FirstSyncErrorTime) - } - if state.Status != StatusError { - t.Errorf("Expected status %s, got %s", StatusError, state.Status) - } -} - -func TestRetryPeriodForL4NetLBDualStackServices(t *testing.T) { - t.Parallel() - currTime := time.Now() - before5min := currTime.Add(-5 * time.Minute) - - svcName1 := "svc1" - metrics := FakeControllerMetrics() - - errorState := newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &currTime) - metrics.SetL4NetLBDualStackService(svcName1, errorState) - - // change FirstSyncErrorTime and verify it will not change metrics state - errorState.FirstSyncErrorTime = &before5min - metrics.SetL4NetLBDualStackService(svcName1, errorState) - state, ok := metrics.l4NetLBDualStackServiceMap[svcName1] - if !ok { - t.Fatalf("state should be set") - } - if *state.FirstSyncErrorTime != currTime { - t.Errorf("FirstSyncErrorTime should not change, expected %v, got %v", currTime, *state.FirstSyncErrorTime) - } - if state.Status != StatusError { - t.Errorf("Expected status %s, got %s", StatusError, state.Status) - } -} - func checkMetricsComputation(newMetrics *ControllerMetrics, expErrorCount, expSvcCount int) error { got := newMetrics.computeL4NetLBMetrics() if got.inError != expErrorCount { @@ -578,117 +430,3 @@ func checkMetricsComputation(newMetrics *ControllerMetrics, expErrorCount, expSv } return nil } - -func TestComputeL4ILBDualStackMetrics(t *testing.T) { - t.Parallel() - - currTime := time.Now() - before10min := currTime.Add(-10 * time.Minute) - before20min := currTime.Add(-20 * time.Minute) - - for _, tc := range []struct { - desc string - serviceStates []L4DualStackServiceState - expectL4ILBDualStackCount map[L4DualStackServiceLabels]int - }{ - { - desc: "empty input", - serviceStates: []L4DualStackServiceState{}, - expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{}, - }, - { - desc: "one l4 ilb dual-stack service", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4", "SingleStack", StatusSuccess, nil), - }, - expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusSuccess}: 1, - }, - }, - { - desc: "l4 ilb dual-stack service in error state", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil), - }, - expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1, - }, - }, - { - desc: "l4 ilb dual-stack service in error state, for 10 minutes", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min), - }, - expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{ - "IPv4", - "SingleStack", - StatusError, - }: 1, - }, - }, - { - desc: "l4 ilb dual-stack service in error state, for 20 minutes", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min), - }, - expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{ - "IPv4", - "SingleStack", - StatusPersistentError, - }: 1, - }, - }, - { - desc: "L4 ILB dual-stack service with IPv4,IPv6 Families", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), - }, - expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 1}, - }, - { - desc: "many l4 ilb dual-stack services", - serviceStates: []L4DualStackServiceState{ - newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), - newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil), - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil), - newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil), - newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil), - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min), - newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min), - }, - expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{ - L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 2, - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 2, - L4DualStackServiceLabels{"IPv6", "SingleStack", StatusSuccess}: 2, - L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1, - }, - }, - } { - tc := tc - t.Run(tc.desc, func(t *testing.T) { - t.Parallel() - newMetrics := FakeControllerMetrics() - for i, serviceState := range tc.serviceStates { - newMetrics.SetL4ILBDualStackService(fmt.Sprint(i), serviceState) - } - got := newMetrics.computeL4ILBDualStackMetrics() - if diff := cmp.Diff(tc.expectL4ILBDualStackCount, got); diff != "" { - t.Fatalf("Got diff for L4 ILB Dual-Stack service counts (-want +got):\n%s", diff) - } - }) - } -} - -func newL4DualStackServiceState(ipFamilies string, ipFamilyPolicy string, status L4DualStackServiceStatus, firstSyncErrorTime *time.Time) L4DualStackServiceState { - return L4DualStackServiceState{ - L4DualStackServiceLabels: L4DualStackServiceLabels{ - IPFamilies: ipFamilies, - IPFamilyPolicy: ipFamilyPolicy, - Status: status, - }, - FirstSyncErrorTime: firstSyncErrorTime, - } -} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index e697d30eab..e85b0df701 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -19,12 +19,10 @@ package metrics import ( "fmt" "os" - "strings" "sync" "time" "github.com/prometheus/client_golang/prometheus" - corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/networking/v1" "k8s.io/apimachinery/pkg/util/wait" frontendconfigv1beta1 "k8s.io/ingress-gce/pkg/apis/frontendconfig/v1beta1" @@ -72,20 +70,6 @@ var ( }, []string{label}, ) - l4ILBDualStackCount = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "number_of_l4_dual_stack_ilbs", - Help: "Number of L4 ILBs with DualStack enabled", - }, - []string{"ip_families", "ip_family_policy", "status"}, - ) - l4NetLBDualStackCount = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "number_of_l4_dual_stack_netlbs", - Help: "Number of L4 NetLBs with DualStack enabled", - }, - []string{"ip_families", "ip_family_policy", "status"}, - ) l4NetLBCount = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "number_of_l4_netlbs", @@ -301,31 +285,6 @@ func (im *ControllerMetrics) DeleteL4ILBService(svcKey string) { delete(im.l4ILBServiceMap, svcKey) } -// SetL4ILBDualStackService implements L4ILBMetricsCollector. -func (im *ControllerMetrics) SetL4ILBDualStackService(svcKey string, state L4DualStackServiceState) { - im.Lock() - defer im.Unlock() - - if im.l4ILBDualStackServiceMap == nil { - klog.Fatalf("L4 ILB DualStack Metrics failed to initialize correctly.") - } - if state.Status == StatusError { - if previousState, ok := im.l4ILBDualStackServiceMap[svcKey]; ok && previousState.FirstSyncErrorTime != nil { - // If service is in Error state and retry timestamp was set then do not update it. - state.FirstSyncErrorTime = previousState.FirstSyncErrorTime - } - } - im.l4ILBDualStackServiceMap[svcKey] = state -} - -// DeleteL4ILBDualStackService implements L4ILBMetricsCollector. -func (im *ControllerMetrics) DeleteL4ILBDualStackService(svcKey string) { - im.Lock() - defer im.Unlock() - - delete(im.l4ILBDualStackServiceMap, svcKey) -} - // SetL4NetLBService adds metric state for given service to map. func (im *ControllerMetrics) SetL4NetLBService(svcKey string, state L4NetLBServiceState) { im.Lock() @@ -352,32 +311,6 @@ func (im *ControllerMetrics) DeleteL4NetLBService(svcKey string) { delete(im.l4NetLBServiceMap, svcKey) } -// SetL4NetLBDualStackService implements L4NetLBMetricsCollector. -func (im *ControllerMetrics) SetL4NetLBDualStackService(svcKey string, state L4DualStackServiceState) { - im.Lock() - defer im.Unlock() - - if im.l4NetLBDualStackServiceMap == nil { - klog.Fatalf("L4 NetLB DualStack Metrics failed to initialize correctly.") - } - - if state.Status == StatusError { - if previousState, ok := im.l4NetLBDualStackServiceMap[svcKey]; ok && previousState.FirstSyncErrorTime != nil { - // If service is in Error state and retry timestamp was set then do not update it. - state.FirstSyncErrorTime = previousState.FirstSyncErrorTime - } - } - im.l4NetLBDualStackServiceMap[svcKey] = state -} - -// DeleteL4NetLBDualStackService implements L4NetLBMetricsCollector. -func (im *ControllerMetrics) DeleteL4NetLBDualStackService(svcKey string) { - im.Lock() - defer im.Unlock() - - delete(im.l4NetLBDualStackServiceMap, svcKey) -} - // SetServiceAttachment adds sa state to the map to be counted during metrics computation. // SetServiceAttachment implements PSCMetricsCollector. func (im *ControllerMetrics) SetServiceAttachment(saKey string, state pscmetrics.PSCState) { @@ -446,33 +379,13 @@ func (im *ControllerMetrics) export() { } klog.V(3).Infof("L4 ILB usage metrics exported.") - ilbDualStackCount := im.computeL4ILBDualStackMetrics() - klog.V(3).Infof("Exporting L4 ILB DualStack usage metrics: %#v", ilbDualStackCount) - for state, count := range ilbDualStackCount { - l4ILBDualStackCount.With(prometheus.Labels{ - "ip_families": state.IPFamilies, - "ip_family_policy": state.IPFamilyPolicy, - "status": string(state.Status), - }).Set(float64(count)) - } - klog.V(3).Infof("L4 ILB DualStack usage metrics exported.") - netlbCount := im.computeL4NetLBMetrics() klog.V(3).Infof("Exporting L4 NetLB usage metrics: %#v", netlbCount) netlbCount.record() - klog.V(3).Infof("L4 NetLB usage metrics exported.") - netlbDualStackCount := im.computeL4NetLBDualStackMetrics() - klog.V(3).Infof("Exporting L4 NetLB DualStack usage metrics: %#v", netlbDualStackCount) - for state, count := range netlbDualStackCount { - l4NetLBDualStackCount.With(prometheus.Labels{ - "ip_families": state.IPFamilies, - "ip_family_policy": state.IPFamilyPolicy, - "status": string(state.Status), - }).Set(float64(count)) - } - klog.V(3).Infof("L4 Netlb DualStack usage metrics exported.") + im.exportL4ILBDualStackMetrics() + im.exportL4NetLBDualStackMetrics() saCount := im.computePSCMetrics() klog.V(3).Infof("Exporting PSC Usage Metrics: %#v", saCount) @@ -626,26 +539,6 @@ func (im *ControllerMetrics) computeL4ILBMetrics() map[feature]int { return counts } -// computeL4ILBDualStackMetrics aggregates L4 ILB DualStack metrics in the cache. -func (im *ControllerMetrics) computeL4ILBDualStackMetrics() map[L4DualStackServiceLabels]int { - im.Lock() - defer im.Unlock() - klog.V(4).Infof("Computing L4 DualStack ILB usage metrics from service state map: %#v", im.l4ILBDualStackServiceMap) - counts := map[L4DualStackServiceLabels]int{} - - for key, state := range im.l4ILBDualStackServiceMap { - klog.V(6).Infof("ILB Service %s has IPFamilies: %v, IPFamilyPolicy: %t, Status: %v", key, state.IPFamilies, state.IPFamilyPolicy, state.Status) - if state.Status == StatusError && - state.FirstSyncErrorTime != nil && - time.Since(*state.FirstSyncErrorTime) >= persistentErrorThresholdTime { - state.Status = StatusPersistentError - } - counts[state.L4DualStackServiceLabels]++ - } - klog.V(4).Info("L4 ILB usage metrics computed.") - return counts -} - // computeL4NetLBMetrics aggregates L4 NetLB metrics in the cache. func (im *ControllerMetrics) computeL4NetLBMetrics() netLBFeatureCount { im.Lock() @@ -680,26 +573,6 @@ func (im *ControllerMetrics) computeL4NetLBMetrics() netLBFeatureCount { return counts } -// computeL4NetLBDualStackMetrics aggregates L4 NetLB DualStack metrics in the cache. -func (im *ControllerMetrics) computeL4NetLBDualStackMetrics() map[L4DualStackServiceLabels]int { - im.Lock() - defer im.Unlock() - klog.V(4).Infof("Computing L4 DualStack NetLB usage metrics from service state map: %#v", im.l4NetLBDualStackServiceMap) - counts := map[L4DualStackServiceLabels]int{} - - for key, state := range im.l4NetLBDualStackServiceMap { - klog.V(6).Infof("NetLB Service %s has IPFamilies: %v, IPFamilyPolicy: %t, Status: %v", key, state.IPFamilies, state.IPFamilyPolicy, state.Status) - if state.Status == StatusError && - state.FirstSyncErrorTime != nil && - time.Since(*state.FirstSyncErrorTime) >= persistentErrorThresholdTime { - state.Status = StatusPersistentError - } - counts[state.L4DualStackServiceLabels]++ - } - klog.V(4).Info("L4 NetLB usage metrics computed.") - return counts -} - func (im *ControllerMetrics) computePSCMetrics() map[feature]int { im.Lock() defer im.Unlock() @@ -808,23 +681,3 @@ func recordComponentVersion() { } componentVersion.WithLabelValues(v).Set(versionValue) } - -func InitServiceDualStackMetricsState(svc *corev1.Service, startTime *time.Time) L4DualStackServiceState { - state := L4DualStackServiceState{} - - var ipFamiliesStrings []string - for _, ipFamily := range svc.Spec.IPFamilies { - ipFamiliesStrings = append(ipFamiliesStrings, string(ipFamily)) - } - state.IPFamilies = strings.Join(ipFamiliesStrings, ",") - - state.IPFamilyPolicy = "" - if svc.Spec.IPFamilyPolicy != nil { - state.IPFamilyPolicy = string(*svc.Spec.IPFamilyPolicy) - } - - // Always init status with error, and update with Success when service was provisioned - state.Status = StatusError - state.FirstSyncErrorTime = startTime - return state -}