Skip to content

Commit

Permalink
Add metrics for endpoint and endpoint slice state
Browse files Browse the repository at this point in the history
Added metrics to collect the state of each endpoint and endpoint
slice. These metrics are only for L7 endpoints.
  • Loading branch information
sawsa307 committed May 2, 2023
1 parent 7bc92b9 commit 65abd66
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 99 deletions.
51 changes: 51 additions & 0 deletions pkg/neg/metrics/endpoint_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"github.com/prometheus/client_golang/prometheus"
)

const (
endpointState = "endpoint_state"
endpointSliceState = "endpoint_slice_state"

endpointStateKey = "neg_sync_endpoint_state"
endpointSliceStateKey = "neg_sync_endpoint_slice_state"
)

var (
// syncerEndpointState tracks the count of endpoints in different states
syncerEndpointState = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: negControllerSubsystem,
Name: endpointStateKey,
Help: "Current count of endpoints in each state",
},
[]string{endpointState},
)

// syncerEndpointSliceState tracks the count of endpoint slices in different states
syncerEndpointSliceState = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: negControllerSubsystem,
Name: endpointSliceStateKey,
Help: "Current count of endpoint slices in each state",
},
[]string{endpointSliceState},
)
)
60 changes: 46 additions & 14 deletions pkg/neg/metrics/neg_metrics_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ type SyncerMetricsCollector interface {
type SyncerMetrics struct {
// syncerStatusMap tracks the status of each syncer
syncerStatusMap map[negtypes.NegSyncerKey]negtypes.Reason
// syncerEndpointStateMap is a map between syncer and endpoint state counts
// syncerEndpointStateMap is a map between syncer and endpoint state counts.
syncerEndpointStateMap map[negtypes.NegSyncerKey]negtypes.StateCountMap
// syncerEPSStateMap is a map between syncer and endpoint slice state counts
syncerEPSStateMap map[negtypes.NegSyncerKey]negtypes.StateCountMap
// syncerEndpointSliceStateMap is a map between syncer and endpoint slice state counts.
syncerEndpointSliceStateMap map[negtypes.NegSyncerKey]negtypes.StateCountMap
// syncerLabelProagationStats is a map between syncer and label propagation stats.
syncerLabelProagationStats map[negtypes.NegSyncerKey]LabelPropagationStats
// mu avoid race conditions and ensure correctness of metrics
Expand All @@ -54,12 +54,12 @@ type SyncerMetrics struct {
// NewNEGMetricsCollector initializes SyncerMetrics and starts a go routine to compute and export metrics periodically.
func NewNegMetricsCollector(exportInterval time.Duration, logger klog.Logger) *SyncerMetrics {
return &SyncerMetrics{
syncerStatusMap: make(map[negtypes.NegSyncerKey]negtypes.Reason),
syncerEndpointStateMap: make(map[negtypes.NegSyncerKey]negtypes.StateCountMap),
syncerEPSStateMap: make(map[negtypes.NegSyncerKey]negtypes.StateCountMap),
syncerLabelProagationStats: make(map[negtypes.NegSyncerKey]LabelPropagationStats),
metricsInterval: exportInterval,
logger: logger.WithName("NegMetricsCollector"),
syncerStatusMap: make(map[negtypes.NegSyncerKey]negtypes.Reason),
syncerEndpointStateMap: make(map[negtypes.NegSyncerKey]negtypes.StateCountMap),
syncerEndpointSliceStateMap: make(map[negtypes.NegSyncerKey]negtypes.StateCountMap),
syncerLabelProagationStats: make(map[negtypes.NegSyncerKey]LabelPropagationStats),
metricsInterval: exportInterval,
logger: logger.WithName("NegMetricsCollector"),
}
}

Expand All @@ -72,6 +72,8 @@ func FakeSyncerMetrics() *SyncerMetrics {
func RegisterSyncerMetrics() {
prometheus.MustRegister(syncerSyncResult)
prometheus.MustRegister(syncerSyncerState)
prometheus.MustRegister(syncerEndpointState)
prometheus.MustRegister(syncerEndpointSliceState)
}

func (sm *SyncerMetrics) Run(stopCh <-chan struct{}) {
Expand All @@ -93,6 +95,14 @@ func (sm *SyncerMetrics) export() {
stateCount, syncerCount := sm.computeSyncerStateMetrics()
PublishSyncerStateMetrics(stateCount)

epStateCount, epsStateCount := sm.computeEndpointStateMetrics(false)
for state, count := range epStateCount {
syncerEndpointState.WithLabelValues(string(state)).Set(float64(count))
}
for state, count := range epsStateCount {
syncerEndpointSliceState.WithLabelValues(string(state)).Set(float64(count))
}

sm.logger.V(3).Info("Exporting syncer related metrics", "Syncer count", syncerCount, "Number of Endpoints", lpMetrics.NumberOfEndpoints)
}

Expand All @@ -114,19 +124,20 @@ func (sm *SyncerMetrics) UpdateSyncerStatusInMetrics(key negtypes.NegSyncerKey,
}

func (sm *SyncerMetrics) UpdateSyncerEPMetrics(key negtypes.NegSyncerKey, endpointCount, endpointSliceCount negtypes.StateCountMap) {
sm.logger.V(3).Info("Updating syncer endpoint", "syncerKey", key)
sm.mu.Lock()
defer sm.mu.Unlock()
if sm.syncerEndpointStateMap == nil {
sm.syncerEndpointStateMap = make(map[negtypes.NegSyncerKey]negtypes.StateCountMap)
sm.logger.V(3).Info("Syncer Metrics failed to initialize correctly, reinitializing syncerEPStateMap")
sm.logger.V(3).Info("Syncer Metrics failed to initialize correctly, reinitializing syncerEndpointStateMap")
}
sm.syncerEndpointStateMap[key] = endpointCount

if sm.syncerEPSStateMap == nil {
sm.syncerEPSStateMap = make(map[negtypes.NegSyncerKey]negtypes.StateCountMap)
sm.logger.V(3).Info("Syncer Metrics failed to initialize correctly, reinitializing syncerEPSStateMap")
if sm.syncerEndpointSliceStateMap == nil {
sm.syncerEndpointSliceStateMap = make(map[negtypes.NegSyncerKey]negtypes.StateCountMap)
sm.logger.V(3).Info("Syncer Metrics failed to initialize correctly, reinitializing syncerEndpointSliceStateMap")
}
sm.syncerEPSStateMap[key] = endpointSliceCount
sm.syncerEndpointSliceStateMap[key] = endpointSliceCount
}

func (sm *SyncerMetrics) SetLabelPropagationStats(key negtypes.NegSyncerKey, labelstatLabelPropagationStats LabelPropagationStats) {
Expand Down Expand Up @@ -164,3 +175,24 @@ func (sm *SyncerMetrics) computeSyncerStateMetrics() (*syncerStateCount, int) {
}
return stateCount, syncerCount
}

// computeSyncerEndpointStateMetrics aggregates endpoint and endpoint slice counts from all syncers
func (sm *SyncerMetrics) computeEndpointStateMetrics(forDegradedMode bool) (negtypes.StateCountMap, negtypes.StateCountMap) {
sm.mu.Lock()
defer sm.mu.Unlock()

epCounts := negtypes.StateCountMap{}
epsCounts := negtypes.StateCountMap{}
// collect count from each syncer
for _, epCount := range sm.syncerEndpointStateMap {
for _, state := range negtypes.StatesForEndpointMetrics() {
epCounts[state] += epCount[state]
}
}
for _, epsCount := range sm.syncerEndpointSliceStateMap {
for _, state := range negtypes.StatesForEndpointMetrics() {
epsCounts[state] += epsCount[state]
}
}
return epCounts, epsCounts
}
7 changes: 6 additions & 1 deletion pkg/neg/syncers/endpoints_calculator.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/client-go/tools/cache"
"k8s.io/ingress-gce/pkg/neg/metrics"
"k8s.io/ingress-gce/pkg/neg/types"
negtypes "k8s.io/ingress-gce/pkg/neg/types"
"k8s.io/ingress-gce/pkg/utils"
"k8s.io/klog/v2"
)
Expand Down Expand Up @@ -221,12 +222,16 @@ func (l *L7EndpointsCalculator) Mode() types.EndpointsCalculatorMode {
// CalculateEndpoints determines the endpoints in the NEGs based on the current service endpoints and the current NEGs.
func (l *L7EndpointsCalculator) CalculateEndpoints(eds []types.EndpointsData, _ map[string]types.NetworkEndpointSet) (map[string]types.NetworkEndpointSet, types.EndpointPodMap, int, error) {
result, err := toZoneNetworkEndpointMap(eds, l.zoneGetter, l.podLister, l.servicePortName, l.networkEndpointType, l.enableDualStackNEG)
return result.NetworkEndpointSet, result.EndpointPodMap, result.DupCount, err
if err != nil { // If current calculation ends up in error, we trigger and emit metrics in degraded mode.
l.syncMetricsCollector.UpdateSyncerEPMetrics(l.syncerKey, result.EPCount, result.EPSCount)
}
return result.NetworkEndpointSet, result.EndpointPodMap, result.EPCount[negtypes.Duplicate], err
}

// CalculateEndpoints determines the endpoints in the NEGs based on the current service endpoints and the current NEGs.
func (l *L7EndpointsCalculator) CalculateEndpointsDegradedMode(eds []types.EndpointsData, _ map[string]types.NetworkEndpointSet) (map[string]types.NetworkEndpointSet, types.EndpointPodMap, error) {
result := toZoneNetworkEndpointMapDegradedMode(eds, l.zoneGetter, l.podLister, l.nodeLister, l.serviceLister, l.servicePortName, l.networkEndpointType, l.enableDualStackNEG)
l.syncMetricsCollector.UpdateSyncerEPMetrics(l.syncerKey, result.EPCount, result.EPSCount)
return result.NetworkEndpointSet, result.EndpointPodMap, nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/neg/syncers/transaction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2092,7 +2092,7 @@ func TestCollectLabelStats(t *testing.T) {

func newL4ILBTestTransactionSyncer(fakeGCE negtypes.NetworkEndpointGroupCloud, mode negtypes.EndpointsCalculatorMode) (negtypes.NegSyncer, *transactionSyncer) {
negsyncer, ts := newTestTransactionSyncer(fakeGCE, negtypes.VmIpEndpointType, false)
ts.endpointsCalculator = GetEndpointsCalculator(ts.podLister, ts.nodeLister, ts.serviceLister, ts.zoneGetter, ts.NegSyncerKey, mode, klog.TODO(), false, metrics.FakeSyncerMetrics())
ts.endpointsCalculator = GetEndpointsCalculator(ts.podLister, ts.nodeLister, ts.serviceLister, ts.zoneGetter, ts.NegSyncerKey, mode, klog.TODO(), false, nil)
return negsyncer, ts
}

Expand Down
Loading

0 comments on commit 65abd66

Please sign in to comment.