Skip to content

Commit

Permalink
Add new flag for configuring metrics export frequency
Browse files Browse the repository at this point in the history
We need to increase the frequency to at least 1/5minutes, but this may not be the final value.
From observability POV the more frequent the better.
With this change we can tune the frequency without rebuilding the binary, and without releasing new tags.

This will allow us to increase export frequency to comply with sampling theorem
https://en.wikipedia.org/wiki/Nyquist%E2%80%93Shannon_sampling_theorem
  • Loading branch information
cezarygerard committed Apr 8, 2022
1 parent 0746d1b commit 24156b8
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 15 deletions.
3 changes: 2 additions & 1 deletion pkg/context/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ func NewControllerContext(
clusterNamer *namer.Namer,
kubeSystemUID types.UID,
config ControllerContextConfig) *ControllerContext {

context := &ControllerContext{
KubeConfig: kubeConfig,
KubeClient: kubeClient,
Expand All @@ -157,7 +158,7 @@ func NewControllerContext(
ClusterNamer: clusterNamer,
L4Namer: namer.NewL4Namer(string(kubeSystemUID), clusterNamer),
KubeSystemUID: kubeSystemUID,
ControllerMetrics: metrics.NewControllerMetrics(),
ControllerMetrics: metrics.NewControllerMetrics(flags.F.MetricsExportInterval),
ControllerContextConfig: config,
IngressInformer: informernetworking.NewIngressInformer(kubeClient, config.Namespace, config.ResyncPeriod, utils.NewNamespaceIndexer()),
ServiceInformer: informerv1.NewServiceInformer(kubeClient, config.Namespace, config.ResyncPeriod, utils.NewNamespaceIndexer()),
Expand Down
2 changes: 2 additions & 0 deletions pkg/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ var (
Version bool
WatchNamespace string
LeaderElection LeaderElectionConfiguration
MetricsExportInterval time.Duration

// Feature flags should be named Enablexxx.
EnableASMConfigMapBasedConfig bool
Expand Down Expand Up @@ -245,6 +246,7 @@ L7 load balancing. CSV values accepted. Example: -node-port-ranges=80,8080,400-5
flag.BoolVar(&F.EnableEndpointSlices, "enable-endpoint-slices", false, "Enable using Endpoint Slices API instead of Endpoints API")
flag.BoolVar(&F.EnableMultipleIgs, "enable-multiple-igs", false, "Enable using unmanaged instance group management")
flag.IntVar(&F.MaxIgSize, "max-ig-size", 1000, "Max number of instances in Instance Group")
flag.DurationVar(&F.MetricsExportInterval, "metrics-export-interval", 10*time.Minute, `Period for calculating and exporting metrics related to state of managed objects.`)
}

type RateLimitSpecs struct {
Expand Down
4 changes: 2 additions & 2 deletions pkg/metrics/l4metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func TestComputeL4ILBMetrics(t *testing.T) {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
newMetrics := NewControllerMetrics()
newMetrics := FakeControllerMetrics()
for i, serviceState := range tc.serviceStates {
newMetrics.SetL4ILBService(fmt.Sprint(i), serviceState)
}
Expand Down Expand Up @@ -307,7 +307,7 @@ func TestComputeL4NetLBMetrics(t *testing.T) {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
newMetrics := NewControllerMetrics()
newMetrics := FakeControllerMetrics()
for i, serviceState := range tc.serviceStates {
newMetrics.SetL4NetLBService(fmt.Sprint(i), serviceState)
}
Expand Down
19 changes: 13 additions & 6 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ const (
)

var (
metricsInterval = 10 * time.Minute
ingressCount = prometheus.NewGaugeVec(
ingressCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "number_of_ingresses",
Help: "Number of Ingresses",
Expand Down Expand Up @@ -142,20 +141,28 @@ type ControllerMetrics struct {
serviceMap map[string]struct{}
//TODO(kl52752) remove mutex and change map to sync.map
sync.Mutex
// duration between metrics exports
metricsInterval time.Duration
}

// NewControllerMetrics initializes ControllerMetrics and starts a go routine to compute and export metrics periodically.
func NewControllerMetrics() *ControllerMetrics {
func NewControllerMetrics(exportInterval time.Duration) *ControllerMetrics {
return &ControllerMetrics{
ingressMap: make(map[string]IngressState),
negMap: make(map[string]NegServiceState),
l4ILBServiceMap: make(map[string]L4ILBServiceState),
l4NetLBServiceMap: make(map[string]L4NetLBServiceState),
pscMap: make(map[string]pscmetrics.PSCState),
serviceMap: make(map[string]struct{}),
metricsInterval: exportInterval,
}
}

// FakeControllerMetrics creates new ControllerMetrics with fixed 10 minutes metricsInterval, to be used in tests
func FakeControllerMetrics() *ControllerMetrics {
return NewControllerMetrics(10 * time.Minute)
}

// servicePortKey defines a service port uniquely.
// Note that same service port combination used by ILB and XLB are treated as separate service ports.
type servicePortKey struct {
Expand All @@ -175,12 +182,12 @@ func (spk servicePortKey) string() string {
}

func (im *ControllerMetrics) Run(stopCh <-chan struct{}) {
klog.V(3).Infof("Ingress Metrics initialized. Metrics will be exported at an interval of %v", metricsInterval)
klog.V(3).Infof("Ingress Metrics initialized. Metrics will be exported at an interval of %v", im.metricsInterval)
// Compute and export metrics periodically.
go func() {
// Wait for ingress states to be populated in the cache before computing metrics.
time.Sleep(metricsInterval)
wait.Until(im.export, metricsInterval, stopCh)
time.Sleep(im.metricsInterval)
wait.Until(im.export, im.metricsInterval, stopCh)
}()
<-stopCh
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,7 @@ func TestComputeIngressMetrics(t *testing.T) {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
newMetrics := NewControllerMetrics()
newMetrics := FakeControllerMetrics()
for _, ingState := range tc.ingressStates {
ingKey := fmt.Sprintf("%s/%s", defaultNamespace, ingState.ingress.Name)
newMetrics.SetIngress(ingKey, ingState)
Expand Down Expand Up @@ -1122,7 +1122,7 @@ func TestComputeNegMetrics(t *testing.T) {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
newMetrics := NewControllerMetrics()
newMetrics := FakeControllerMetrics()
for i, negState := range tc.negStates {
newMetrics.SetNegService(fmt.Sprint(i), negState)
}
Expand Down Expand Up @@ -1222,7 +1222,7 @@ func TestComputePSCMetrics(t *testing.T) {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
newMetrics := NewControllerMetrics()
newMetrics := FakeControllerMetrics()
for i, serviceState := range tc.saStates {
newMetrics.SetServiceAttachment(strconv.Itoa(i), serviceState)
}
Expand Down Expand Up @@ -1284,7 +1284,7 @@ func TestComputeServiceMetrics(t *testing.T) {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
newMetrics := NewControllerMetrics()
newMetrics := FakeControllerMetrics()
for _, service := range tc.services {
newMetrics.SetService(service)
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/neg/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ const (
)

var (
defaultBackend = utils.ServicePort{
metricsInterval = 10 * time.Minute
defaultBackend = utils.ServicePort{
ID: utils.ServicePortID{
Service: types.NamespacedName{
Name: "default-http-backend",
Expand Down Expand Up @@ -135,7 +136,7 @@ func newTestControllerWithParamsAndContext(kubeClient kubernetes.Interface, test
drDynamicInformer.Informer(),
testContext.SvcNegInformer,
func() bool { return true },
metrics.NewControllerMetrics(),
metrics.FakeControllerMetrics(),
testContext.L4Namer,
defaultBackend,
negtypes.NewAdapter(testContext.Cloud),
Expand Down

0 comments on commit 24156b8

Please sign in to comment.