Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFQ Relayer: add request status count metric #2856

Merged
merged 19 commits into from
Jul 9, 2024
Merged
32 changes: 32 additions & 0 deletions services/rfq/relayer/reldb/base/quote.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,35 @@
}
return status >= prevStatus
}

type statusCount struct {
Status int
Count int64
}

// GetStatusCounts gets the counts of quote requests by status.
func (s Store) GetStatusCounts(ctx context.Context, matchStatuses ...reldb.QuoteRequestStatus) (map[reldb.QuoteRequestStatus]int, error) {
inArgs := make([]int, len(matchStatuses))
for i := range matchStatuses {
inArgs[i] = int(matchStatuses[i].Int())
}

Check warning on line 152 in services/rfq/relayer/reldb/base/quote.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/reldb/base/quote.go#L148-L152

Added lines #L148 - L152 were not covered by tests

var results []statusCount
tx := s.DB().
WithContext(ctx).
Model(&RequestForQuote{}).
Select(fmt.Sprintf("%s, COUNT(*) as count", statusFieldName)).
Where(fmt.Sprintf("%s IN ?", statusFieldName), inArgs).
Group(statusFieldName).
Scan(&results)
if tx.Error != nil {
return nil, fmt.Errorf("could not get db results: %w", tx.Error)
}

Check warning on line 164 in services/rfq/relayer/reldb/base/quote.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/reldb/base/quote.go#L154-L164

Added lines #L154 - L164 were not covered by tests

statuses := make(map[reldb.QuoteRequestStatus]int)
for _, result := range results {
statuses[reldb.QuoteRequestStatus(result.Status)] = int(result.Count)
}

Check warning on line 169 in services/rfq/relayer/reldb/base/quote.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/reldb/base/quote.go#L166-L169

Added lines #L166 - L169 were not covered by tests

return statuses, nil

Check warning on line 171 in services/rfq/relayer/reldb/base/quote.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/reldb/base/quote.go#L171

Added line #L171 was not covered by tests
trajan0x marked this conversation as resolved.
Show resolved Hide resolved
}
2 changes: 2 additions & 0 deletions services/rfq/relayer/reldb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type Reader interface {
GetRebalanceByID(ctx context.Context, rebalanceID string) (*Rebalance, error)
// GetDBStats gets the database stats.
GetDBStats(ctx context.Context) (*sql.DBStats, error)
// GetStatusCounts gets the counts of quote requests by status.
GetStatusCounts(ctx context.Context, matchStatuses ...QuoteRequestStatus) (map[QuoteRequestStatus]int, error)
}

// Service is the interface for the database service.
Expand Down
82 changes: 82 additions & 0 deletions services/rfq/relayer/service/otel.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package service

import (
"context"
"fmt"

"github.com/cornelk/hashmap"
"github.com/synapsecns/sanguine/core/metrics"
"github.com/synapsecns/sanguine/ethergo/signer/signer"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)

const meterName = "github.com/synapsecns/sanguine/services/rfq/relayer/service"

// generate an interface for otelRecorder that exports the public method.
// this allows us to avoid using recordX externally anad makes the package less confusing.
//
// =============================================================================
// =============================================================================
// IMPORTANT: DO NOT REMOVE THIS COMMENT.
// NOTICE: PLEASE MAKE SURE YOU UPDATE BOTH THE DOCS AND THE GRAFANA DASHBOARD (IF NEEDED) AFTER UPDATING METRICS.
// =============================================================================
// =============================================================================
//
//go:generate go run github.com/vburenin/ifacemaker -f otel.go -s otelRecorder -i iOtelRecorder -p service -o otel_generated.go -c "autogenerated file"
type otelRecorder struct {
metrics metrics.Handler
// meter is the metrics meter.
meter metric.Meter
// statusCountGauge is the gauge for the status.
statusCountGauge metric.Int64ObservableGauge
// statusCounts is used for metrics.
// status -> count
statusCounts *hashmap.Map[int, int]
// signer is the signer for signing transactions.
signer signer.Signer
}

func newOtelRecorder(meterHandler metrics.Handler, signer signer.Signer) (_ iOtelRecorder, err error) {
or := otelRecorder{
metrics: meterHandler,
meter: meterHandler.Meter(meterName),
statusCounts: hashmap.New[int, int](),
signer: signer,
}

or.statusCountGauge, err = or.meter.Int64ObservableGauge("status_count")
trajan0x marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, fmt.Errorf("could not create last block gauge")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in error message: should be 'could not create status count gauge'

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in error message: should be 'could not create status count gauge'

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in error message: should be 'could not create status count gauge'

}

Check warning on line 51 in services/rfq/relayer/service/otel.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/otel.go#L40-L51

Added lines #L40 - L51 were not covered by tests

_, err = or.meter.RegisterCallback(or.recordStatusCounts, or.statusCountGauge)
if err != nil {
return nil, fmt.Errorf("could not register callback for status gauge")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in error message: should be 'could not register callback for status count gauge'

Suggested change
return nil, fmt.Errorf("could not register callback for status gauge")
return nil, fmt.Errorf("could not register callback for status count gauge")

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in error message: should be 'could not register callback for status count gauge'

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in error message: should be 'could not register callback for status count gauge'

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in error message: should be 'could not register callback for status count gauge'

}

Check warning on line 56 in services/rfq/relayer/service/otel.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/otel.go#L53-L56

Added lines #L53 - L56 were not covered by tests

return &or, nil

Check warning on line 58 in services/rfq/relayer/service/otel.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/otel.go#L58

Added line #L58 was not covered by tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improve error messages for better debugging.

The error messages in the newOtelRecorder method could be more specific to aid in debugging.

-	return nil, fmt.Errorf("could not create last block gauge")
+	return nil, fmt.Errorf("could not create status count gauge: %w", err)

-	return nil, fmt.Errorf("could not register callback for status gauge")
+	return nil, fmt.Errorf("could not register callback for status count gauge: %w", err)
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
func newOtelRecorder(meterHandler metrics.Handler, signer signer.Signer) (_ iOtelRecorder, err error) {
or := otelRecorder{
metrics: meterHandler,
meter: meterHandler.Meter(meterName),
statusCounts: hashmap.New[int, int](),
signer: signer,
}
or.statusCountGauge, err = or.meter.Int64ObservableGauge("status_count")
if err != nil {
return nil, fmt.Errorf("could not create last block gauge")
}
_, err = or.meter.RegisterCallback(or.recordStatusCounts, or.statusCountGauge)
if err != nil {
return nil, fmt.Errorf("could not register callback for status gauge")
}
return &or, nil
func newOtelRecorder(meterHandler metrics.Handler, signer signer.Signer) (_ iOtelRecorder, err error) {
or := otelRecorder{
metrics: meterHandler,
meter: meterHandler.Meter(meterName),
statusCounts: hashmap.New[int, int](),
signer: signer,
}
or.statusCountGauge, err = or.meter.Int64ObservableGauge("status_count")
if err != nil {
return nil, fmt.Errorf("could not create status count gauge: %w", err)
}
_, err = or.meter.RegisterCallback(or.recordStatusCounts, or.statusCountGauge)
if err != nil {
return nil, fmt.Errorf("could not register callback for status count gauge: %w", err)
}
return &or, nil

}

func (o *otelRecorder) recordStatusCounts(_ context.Context, observer metric.Observer) (err error) {
if o.metrics == nil || o.statusCountGauge == nil || o.statusCounts == nil {
return nil
Comment on lines +61 to +63
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🪶 style: Consider logging an error or warning if any of these conditions are true.

}

Check warning on line 64 in services/rfq/relayer/service/otel.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/otel.go#L61-L64

Added lines #L61 - L64 were not covered by tests

o.statusCounts.Range(func(status int, count int) bool {
opts := metric.WithAttributes(
attribute.Int("status", status),
attribute.String("wallet", o.signer.Address().Hex()),
)
observer.ObserveInt64(o.statusCountGauge, int64(count), opts)

return true
})

Check warning on line 74 in services/rfq/relayer/service/otel.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/otel.go#L66-L74

Added lines #L66 - L74 were not covered by tests

return nil

Check warning on line 76 in services/rfq/relayer/service/otel.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/otel.go#L76

Added line #L76 was not covered by tests
}

// RecordStatusCounts records the request status count.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in method name: should be 'RecordStatusCount'

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📚 spelling: Typo in method name: should be 'RecordStatusCount'

func (o *otelRecorder) RecordStatusCount(status, count int) {
o.statusCounts.Set(status, count)

Check warning on line 81 in services/rfq/relayer/service/otel.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/otel.go#L80-L81

Added lines #L80 - L81 were not covered by tests
}
9 changes: 9 additions & 0 deletions services/rfq/relayer/service/otel_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 35 additions & 1 deletion services/rfq/relayer/service/relayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
// semaphore is used to limit the number of concurrent requests
semaphore *semaphore.Weighted
// handlerMtx is used to synchronize handling of relay requests
handlerMtx mapmutex.StringMapMutex
handlerMtx mapmutex.StringMapMutex
otelRecorder iOtelRecorder
}

var logger = log.Logger("relayer")
Expand Down Expand Up @@ -142,6 +143,11 @@
return nil, fmt.Errorf("could not get api server: %w", err)
}

otelRecorder, err := newOtelRecorder(metricHandler, sg)
if err != nil {
return nil, fmt.Errorf("could not get otel recorder: %w", err)
}

Check warning on line 149 in services/rfq/relayer/service/relayer.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/relayer.go#L146-L149

Added lines #L146 - L149 were not covered by tests
Comment on lines +146 to +149
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improve error messages and add tests.

The error messages could be more specific to aid in debugging. Additionally, these lines are not covered by tests.

-	return nil, fmt.Errorf("could not get otel recorder: %w", err)
+	return nil, fmt.Errorf("could not initialize otel recorder: %w", err)

Do you want me to generate the unit testing code or open a GitHub issue to track this task?

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
otelRecorder, err := newOtelRecorder(metricHandler, sg)
if err != nil {
return nil, fmt.Errorf("could not get otel recorder: %w", err)
}
otelRecorder, err := newOtelRecorder(metricHandler, sg)
if err != nil {
return nil, fmt.Errorf("could not initialize otel recorder: %w", err)
}
Tools
GitHub Check: codecov/patch

[warning] 146-149: services/rfq/relayer/service/relayer.go#L146-L149
Added lines #L146 - L149 were not covered by tests


cache := ttlcache.New[common.Hash, bool](ttlcache.WithTTL[common.Hash, bool](time.Second * 30))
rel := Relayer{
db: store,
Expand All @@ -159,6 +165,7 @@
apiClient: apiClient,
semaphore: semaphore.NewWeighted(maxConcurrentRequests),
handlerMtx: mapmutex.NewStringMapMutex(),
otelRecorder: otelRecorder,

Check warning on line 168 in services/rfq/relayer/service/relayer.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/relayer.go#L168

Added line #L168 was not covered by tests
}
return &rel, nil
}
Expand Down Expand Up @@ -280,6 +287,14 @@
return nil
})

g.Go(func() error {
err = r.recordMetrics(ctx)
if err != nil {
return fmt.Errorf("could not record metrics: %w", err)
}
return nil

Check warning on line 295 in services/rfq/relayer/service/relayer.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/relayer.go#L290-L295

Added lines #L290 - L295 were not covered by tests
Comment on lines +290 to +295
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improve error messages and add tests.

The error messages could be more specific to aid in debugging. Additionally, these lines are not covered by tests.

-	return fmt.Errorf("could not record metrics: %w", err)
+	return fmt.Errorf("error recording metrics: %w", err)

Do you want me to generate the unit testing code or open a GitHub issue to track this task?

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
g.Go(func() error {
err = r.recordMetrics(ctx)
if err != nil {
return fmt.Errorf("could not record metrics: %w", err)
}
return nil
g.Go(func() error {
err = r.recordMetrics(ctx)
if err != nil {
return fmt.Errorf("error recording metrics: %w", err)
}
return nil
Tools
GitHub Check: codecov/patch

[warning] 290-295: services/rfq/relayer/service/relayer.go#L290-L295
Added lines #L290 - L295 were not covered by tests

})

err = g.Wait()
if err != nil {
return fmt.Errorf("could not start: %w", err)
Expand Down Expand Up @@ -360,6 +375,25 @@
return nil
}

const defaultMetricsInterval = 10

func (r *Relayer) recordMetrics(ctx context.Context) (err error) {
for {
select {
case <-ctx.Done():
return fmt.Errorf("could not record metrics: %w", ctx.Err())
case <-time.After(defaultMetricsInterval * time.Second):
statusCounts, err := r.db.GetStatusCounts(ctx)
if err != nil {
return fmt.Errorf("could not get status counts: %w", err)
}
for status, count := range statusCounts {
r.otelRecorder.RecordStatusCount(int(status.Int()), count)
}

Check warning on line 392 in services/rfq/relayer/service/relayer.go

View check run for this annotation

Codecov / codecov/patch

services/rfq/relayer/service/relayer.go#L380-L392

Added lines #L380 - L392 were not covered by tests
}
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improve error messages for better debugging.

The error messages in the recordMetrics method could be more specific to aid in debugging.

-	return fmt.Errorf("could not record metrics: %w", ctx.Err())
+	return fmt.Errorf("context done while recording metrics: %w", ctx.Err())

-	return fmt.Errorf("could not get status counts: %w", err)
+	return fmt.Errorf("could not get status counts from database: %w", err)
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
func (r *Relayer) recordMetrics(ctx context.Context) (err error) {
for {
select {
case <-ctx.Done():
return fmt.Errorf("could not record metrics: %w", ctx.Err())
case <-time.After(defaultMetricsInterval * time.Second):
statusCounts, err := r.db.GetStatusCounts(ctx)
if err != nil {
return fmt.Errorf("could not get status counts: %w", err)
}
for status, count := range statusCounts {
r.otelRecorder.RecordStatusCount(int(status.Int()), count)
}
}
}
}
func (r *Relayer) recordMetrics(ctx context.Context) (err error) {
for {
select {
case <-ctx.Done():
return fmt.Errorf("context done while recording metrics: %w", ctx.Err())
case <-time.After(defaultMetricsInterval * time.Second):
statusCounts, err := r.db.GetStatusCounts(ctx)
if err != nil {
return fmt.Errorf("could not get status counts from database: %w", err)
}
for status, count := range statusCounts {
r.otelRecorder.RecordStatusCount(int(status.Int()), count)
}
}
}
}


func (r *Relayer) processDB(ctx context.Context, serial bool, matchStatuses ...reldb.QuoteRequestStatus) (err error) {
ctx, span := r.metrics.Tracer().Start(ctx, "processDB", trace.WithAttributes(
attribute.Bool("serial", serial),
Expand Down
Loading