Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support/db: Add round_trip_time_seconds metric #4009

Merged
merged 12 commits into from
Oct 18, 2021
2 changes: 1 addition & 1 deletion services/horizon/internal/actions_effects_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func TestEffectActions_Index(t *testing.T) {
ht.ReapHistory(1)
w = ht.Get("/effects?order=desc&cursor=8589938689-1")
ht.Assert.Equal(410, w.Code)
ht.Logger.Error(w.Body.String())
ht.T.T.Log(w.Body.String())
})

t.Run("Effect resource props", func(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions services/horizon/internal/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ func (a *App) init() error {
a.ctx, a.cancel = context.WithCancel(context.Background())

// log
log.DefaultLogger.Logger.Level = a.config.LogLevel
log.DefaultLogger.Logger.Hooks.Add(logmetrics.DefaultMetrics)
log.DefaultLogger.Logger.SetLevel(a.config.LogLevel)
log.DefaultLogger.Logger.AddHook(logmetrics.DefaultMetrics)

// sentry
initSentry(a)
Expand Down
44 changes: 17 additions & 27 deletions services/horizon/internal/test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@
package test

import (
"bytes"
"context"
"net/http"
"net/http/httptest"
"testing"

"github.com/jmoiron/sqlx"
"github.com/sirupsen/logrus"
"github.com/stellar/go/services/horizon/internal/logmetrics"
tdb "github.com/stellar/go/services/horizon/internal/test/db"
"github.com/stellar/go/support/log"
"github.com/stretchr/testify/assert"
Expand All @@ -27,15 +25,12 @@ type StaticMockServer struct {

// T provides a common set of functionality for each test in horizon
type T struct {
T *testing.T
Assert *assert.Assertions
Require *require.Assertions
Ctx context.Context
HorizonDB *sqlx.DB
CoreDB *sqlx.DB
Logger *log.Entry
LogMetrics *logmetrics.Metrics
LogBuffer *bytes.Buffer
T *testing.T
Assert *assert.Assertions
Require *require.Assertions
Ctx context.Context
HorizonDB *sqlx.DB
CoreDB *sqlx.DB
}

// Context provides a context suitable for testing in tests that do not create
Expand Down Expand Up @@ -63,25 +58,25 @@ func DatabaseURL() string {
// OverrideLogger sets the default logger used by horizon to `l`. This is used
// by the testing system so that we can collect output from logs during test
// runs. Panics if the logger is already overridden.
func OverrideLogger(l *log.Entry) {
if oldDefault != nil {
func OverrideLogger() {
if endLogTest != nil {
panic("logger already overridden")
}

oldDefault = log.DefaultLogger
log.DefaultLogger = l
endLogTest = log.StartTest(log.DebugLevel)
}

// RestoreLogger restores the default horizon logger after it is overridden
// using a call to `OverrideLogger`. Panics if the default logger is not
// presently overridden.
func RestoreLogger() {
if oldDefault == nil {
func RestoreLogger() []logrus.Entry {
if endLogTest == nil {
panic("logger not overridden, cannot restore")
}

log.DefaultLogger = oldDefault
oldDefault = nil
entries := endLogTest()
endLogTest = nil
return entries
}

// Start initializes a new test helper object and conceptually "starts" a new
Expand All @@ -90,15 +85,10 @@ func Start(t *testing.T) *T {
result := &T{}

result.T = t
result.LogBuffer = new(bytes.Buffer)
result.Logger, result.LogMetrics = logmetrics.New()
result.Logger.Logger.Out = result.LogBuffer
result.Logger.Logger.Formatter.(*logrus.TextFormatter).DisableColors = true
result.Logger.Logger.Level = logrus.DebugLevel

OverrideLogger(result.Logger)
OverrideLogger()

result.Ctx = log.Set(context.Background(), result.Logger)
result.Ctx = log.Set(context.Background(), log.DefaultLogger)
result.HorizonDB = Database(t)
result.CoreDB = StellarCoreDatabase(t)
result.Assert = assert.New(t)
Expand All @@ -122,4 +112,4 @@ func StellarCoreDatabaseURL() string {
return tdb.StellarCoreURL()
}

var oldDefault *log.Entry = nil
var endLogTest func() []logrus.Entry = nil
11 changes: 8 additions & 3 deletions services/horizon/internal/test/t.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@ func (t *T) CoreSession() *db.Session {
// Finish finishes the test, logging any accumulated horizon logs to the logs
// output
func (t *T) Finish() {
RestoreLogger()
logEntries := RestoreLogger()
operationfeestats.ResetState()

if t.LogBuffer.Len() > 0 {
t.T.Log("\n" + t.LogBuffer.String())
for _, entry := range logEntries {
logString, err := entry.String()
if err != nil {
t.T.Logf("Error from entry.String: %v", err)
} else {
t.T.Log(logString)
}
}
}

Expand Down
28 changes: 27 additions & 1 deletion support/db/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func contextRoute(ctx context.Context) string {

type SessionWithMetrics struct {
SessionInterface

registry *prometheus.Registry
queryCounter *prometheus.CounterVec
queryDurationSummary *prometheus.SummaryVec
Expand All @@ -54,6 +55,8 @@ type SessionWithMetrics struct {
maxIdleClosedCounter prometheus.CounterFunc
maxIdleTimeClosedCounter prometheus.CounterFunc
maxLifetimeClosedCounter prometheus.CounterFunc
roundTripProbe *roundTripProbe
roundTripTimeSummary prometheus.Summary
}

func RegisterMetrics(base *Session, namespace string, sub Subservice, registry *prometheus.Registry) SessionInterface {
Expand Down Expand Up @@ -221,10 +224,28 @@ func RegisterMetrics(base *Session, namespace string, sub Subservice, registry *
)
registry.MustRegister(s.maxLifetimeClosedCounter)

s.roundTripTimeSummary = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: "db",
Name: "round_trip_time_seconds",
Help: "time required to run `select 1` query in a DB - effectively measures round trip time",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
)
registry.MustRegister(s.roundTripTimeSummary)

s.roundTripProbe = &roundTripProbe{
session: base.Clone(),
roundTripTimeSummary: s.roundTripTimeSummary,
}
s.roundTripProbe.start()
return s
}

func (s *SessionWithMetrics) Close() error {
s.roundTripProbe.close()

s.registry.Unregister(s.queryCounter)
s.registry.Unregister(s.queryDurationSummary)
// s.registry.Unregister(s.txnCounter)
Expand Down Expand Up @@ -270,7 +291,12 @@ func (s *SessionWithMetrics) TruncateTables(ctx context.Context, tables []string

func (s *SessionWithMetrics) Clone() SessionInterface {
return &SessionWithMetrics{
SessionInterface: s.SessionInterface.Clone(),
SessionInterface: s.SessionInterface.Clone(),

// Note that clonned Session will point at the same roundTripProbe
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: clonned -> cloned

// to avoid starting multiple go routines.
roundTripProbe: s.roundTripProbe,

registry: s.registry,
queryCounter: s.queryCounter,
queryDurationSummary: s.queryDurationSummary,
Expand Down
47 changes: 47 additions & 0 deletions support/db/round_trip_probe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package db

import (
"context"
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
)

type roundTripProbe struct {
session SessionInterface
roundTripTimeSummary prometheus.Summary

closeChan chan struct{}
closeOnce sync.Once
}

func (p *roundTripProbe) start() {
p.closeChan = make(chan struct{})
// session must be cloned because will be used concurrently in a
// separate go routine in roundTripProbe
p.session = p.session.Clone()
bartekn marked this conversation as resolved.
Show resolved Hide resolved

go func() {
for {
select {
case <-time.After(time.Second):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 This is probably not a significant concern, but this will create a new channel on every call. For a repeating timer I typically use time.Ticker. You create it with the duration, and it gives you a single chan that'll have an event to read every duration period that passes.

The one difference is that the way you're using time.After here will result in ~1 second of time between the end of the last probe and beginning of the next, but if you use Ticker it'll result in ~1 second of time between the start of each probe.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call!

ctx, cancel := context.WithTimeout(context.Background(), time.Second)
startTime := time.Now()
_, err := p.session.ExecRaw(ctx, "select 1")
if err == nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we check for context deadline exceeded errors? If the select is taking longer than 1 second we would be blind to this if we're looking just at prometheus. maybe we should still call p.roundTripTimeSummary.Observe() on deadline exceeded errors?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call. Added Observe(1) in case of errors and also updated the metric comment to inform that this is the max value.

p.roundTripTimeSummary.Observe(time.Since(startTime).Seconds())
}
cancel()
case <-p.closeChan:
return
}
}
}()
}

func (p *roundTripProbe) close() {
p.closeOnce.Do(func() {
close(p.closeChan)
})
}
10 changes: 5 additions & 5 deletions support/log/entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,17 @@ func (e *Entry) StartTest(level logrus.Level) func() []logrus.Entry {
e.isTesting = true

hook := &test.Hook{}
e.Logger.Hooks.Add(hook)
bartekn marked this conversation as resolved.
Show resolved Hide resolved
e.Logger.AddHook(hook)

old := e.Logger.Out
e.Logger.Out = ioutil.Discard

oldLevel := e.Logger.Level
e.Logger.Level = level
oldLevel := e.Logger.GetLevel()
e.Logger.SetLevel(level)

return func() []logrus.Entry {
e.Logger.Level = oldLevel
e.Logger.Out = old
e.Logger.SetLevel(oldLevel)
e.Logger.SetOutput(old)
e.removeHook(hook)
e.isTesting = false
return hook.Entries
Expand Down