From 86ad67cfbdece5a81154382c94296c3046063b4f Mon Sep 17 00:00:00 2001 From: Jian Xiao Date: Fri, 20 Sep 2024 00:27:12 +0000 Subject: [PATCH 1/2] Reduce the operator latency metric cardinality --- disperser/batcher/metrics.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/disperser/batcher/metrics.go b/disperser/batcher/metrics.go index 0a71829278..8fc50370aa 100644 --- a/disperser/batcher/metrics.go +++ b/disperser/batcher/metrics.go @@ -51,7 +51,8 @@ type FinalizerMetrics struct { } type DispatcherMetrics struct { - Latency *prometheus.SummaryVec + Latency *prometheus.SummaryVec + OperatorLatency *prometheus.GaugeVec } type Metrics struct { @@ -178,6 +179,14 @@ func NewMetrics(httpPort string, logger logging.Logger) *Metrics { }, []string{"operator_id", "status"}, ), + OperatorLatency: promauto.With(reg).NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "operator_latency", + Help: "the attestation latency observed for operators", + }, + []string{"operator_id"}, + ), } metrics := &Metrics{ @@ -288,7 +297,14 @@ func (t *DispatcherMetrics) ObserveLatency(operatorId string, success bool, late if !success { label = "failure" } - t.Latency.WithLabelValues(operatorId, label).Observe(latencyMS) + // The Latency metric has "operator_id" but we null it out because it's separately + // tracked in OperatorLatency. + t.Latency.WithLabelValues("", label).Observe(latencyMS) + // Only tracks successful requests, so there is one stream per operator. + // This is sufficient to provide insights of operators' performance. + if success { + t.OperatorLatency.WithLabelValues(operatorId).Set(latencyMS) + } } // UpdateCompletedBlob increments the number and updates size of processed blobs. From 950828750c693e7a583dcd9c3a0aa81c0c848493 Mon Sep 17 00:00:00 2001 From: Jian Xiao Date: Fri, 20 Sep 2024 01:34:44 +0000 Subject: [PATCH 2/2] fix --- disperser/batcher/metrics.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/disperser/batcher/metrics.go b/disperser/batcher/metrics.go index 8fc50370aa..8ccabcb004 100644 --- a/disperser/batcher/metrics.go +++ b/disperser/batcher/metrics.go @@ -182,8 +182,8 @@ func NewMetrics(httpPort string, logger logging.Logger) *Metrics { OperatorLatency: promauto.With(reg).NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, - Name: "operator_latency", - Help: "the attestation latency observed for operators", + Name: "operator_attestation_latency_ms", + Help: "attestation latency in ms observed for operators", }, []string{"operator_id"}, ),