diff --git a/support/db/metrics.go b/support/db/metrics.go index 5b322e80c8..5e893f026e 100644 --- a/support/db/metrics.go +++ b/support/db/metrics.go @@ -42,6 +42,7 @@ func contextRoute(ctx context.Context) string { type SessionWithMetrics struct { SessionInterface + registry *prometheus.Registry queryCounter *prometheus.CounterVec queryDurationSummary *prometheus.SummaryVec @@ -54,6 +55,8 @@ type SessionWithMetrics struct { maxIdleClosedCounter prometheus.CounterFunc maxIdleTimeClosedCounter prometheus.CounterFunc maxLifetimeClosedCounter prometheus.CounterFunc + roundTripProbe *roundTripProbe + roundTripTimeSummary prometheus.Summary } func RegisterMetrics(base *Session, namespace string, sub Subservice, registry *prometheus.Registry) SessionInterface { @@ -221,10 +224,28 @@ func RegisterMetrics(base *Session, namespace string, sub Subservice, registry * ) registry.MustRegister(s.maxLifetimeClosedCounter) + s.roundTripTimeSummary = prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: "db", + Name: "round_trip_time_seconds", + Help: "time required to run `select 1` query in a DB - effectively measures round trip time, if time exceeds 1s it will be recorded as 1", + ConstLabels: prometheus.Labels{"subservice": string(sub)}, + }, + ) + registry.MustRegister(s.roundTripTimeSummary) + + s.roundTripProbe = &roundTripProbe{ + session: base, + roundTripTimeSummary: s.roundTripTimeSummary, + } + s.roundTripProbe.start() return s } func (s *SessionWithMetrics) Close() error { + s.roundTripProbe.close() + s.registry.Unregister(s.queryCounter) s.registry.Unregister(s.queryDurationSummary) // s.registry.Unregister(s.txnCounter) @@ -270,7 +291,12 @@ func (s *SessionWithMetrics) TruncateTables(ctx context.Context, tables []string func (s *SessionWithMetrics) Clone() SessionInterface { return &SessionWithMetrics{ - SessionInterface: s.SessionInterface.Clone(), + SessionInterface: s.SessionInterface.Clone(), + + // Note that cloned Session will point at the same roundTripProbe + // to avoid starting multiple go routines. + roundTripProbe: s.roundTripProbe, + registry: s.registry, queryCounter: s.queryCounter, queryDurationSummary: s.queryDurationSummary, diff --git a/support/db/round_trip_probe.go b/support/db/round_trip_probe.go new file mode 100644 index 0000000000..180702bd2e --- /dev/null +++ b/support/db/round_trip_probe.go @@ -0,0 +1,51 @@ +package db + +import ( + "context" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +type roundTripProbe struct { + session SessionInterface + roundTripTimeSummary prometheus.Summary + + closeChan chan struct{} + closeOnce sync.Once +} + +func (p *roundTripProbe) start() { + p.closeChan = make(chan struct{}) + // session must be cloned because will be used concurrently in a + // separate go routine in roundTripProbe + p.session = p.session.Clone() + ticker := time.NewTicker(time.Second) + + go func() { + for { + select { + case <-ticker.C: + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + startTime := time.Now() + _, err := p.session.ExecRaw(ctx, "select 1") + duration := time.Since(startTime).Seconds() + if err != nil { + duration = 1 + } + p.roundTripTimeSummary.Observe(duration) + cancel() + case <-p.closeChan: + ticker.Stop() + return + } + } + }() +} + +func (p *roundTripProbe) close() { + p.closeOnce.Do(func() { + close(p.closeChan) + }) +}