diff --git a/docs/bridge/docs/rfq/Relayer/Relayer.md b/docs/bridge/docs/rfq/Relayer/Relayer.md index 612dcb25ed..b2969fd683 100644 --- a/docs/bridge/docs/rfq/Relayer/Relayer.md +++ b/docs/bridge/docs/rfq/Relayer/Relayer.md @@ -231,3 +231,4 @@ The metrics exposed by the relayer are: - `inventory_balance`: The balance of the inventory on the chain for a given `token_name` and `relayer`. - `quote_amount`: The amount quoted for a given `token_name` and `relayer`. +- `status_count`: The distribution of non-terminal `QuoteRequestStatus` values over time. diff --git a/services/rfq/go.mod b/services/rfq/go.mod index a66c26c13a..73f87732f5 100644 --- a/services/rfq/go.mod +++ b/services/rfq/go.mod @@ -8,6 +8,7 @@ require ( github.com/Flaque/filet v0.0.0-20201012163910-45f684403088 github.com/alecthomas/assert v1.0.0 github.com/brianvoe/gofakeit/v6 v6.27.0 + github.com/cornelk/hashmap v1.0.8 github.com/dubonzi/otelresty v1.3.0 github.com/ethereum/go-ethereum v1.13.8 github.com/gin-gonic/gin v1.10.0 @@ -107,7 +108,6 @@ require ( github.com/consensys/bavard v0.1.13 // indirect github.com/consensys/gnark-crypto v0.12.1 // indirect github.com/containerd/continuity v0.4.2 // indirect - github.com/cornelk/hashmap v1.0.8 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/crate-crypto/go-ipa v0.0.0-20231025140028-3c0104f4b233 // indirect github.com/crate-crypto/go-kzg-4844 v0.7.0 // indirect diff --git a/services/rfq/relayer/dashboards/dashboard.json b/services/rfq/relayer/dashboards/dashboard.json index 3e99e850e3..2a2af48804 100644 --- a/services/rfq/relayer/dashboards/dashboard.json +++ b/services/rfq/relayer/dashboards/dashboard.json @@ -1,41 +1,4 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__elements": {}, - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "10.1.2" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "text", - "name": "Text", - "version": "" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], "annotations": { "list": [ { @@ -55,14 +18,14 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": null, + "id": 770, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "", "gridPos": { @@ -88,7 +51,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "ETH balances of the RFQ relayer by chain.", "fieldConfig": { @@ -167,7 +130,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", "expr": "avg(inventory_balance{token_name=\"ETH\",relayer=\"$relayer\"}) by (chain_id)", @@ -183,7 +146,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "USDC balances of the RFQ relayer by chain.", "fieldConfig": { @@ -262,7 +225,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", "expr": "avg(inventory_balance{token_name=\"USD Coin\",relayer=\"$relayer\"}) by (chain_id)", @@ -278,7 +241,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "Displays the ETH quote amount for each origin -> destination path.", "fieldConfig": { @@ -357,7 +320,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", "expr": "avg(quote_amount{origin_token_name=\"ETH\", dest_token_name=\"ETH\"}) by (origin_chain_id, dest_chain_id)", @@ -373,7 +336,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "Displays the USDC quote amount for each origin -> destination path.", "fieldConfig": { @@ -452,7 +415,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", "expr": "avg(quote_amount{origin_token_name=\"USD Coin\"} / 1e6) by (origin_chain_id, destination_chain_id)", @@ -465,6 +428,105 @@ ], "title": "USDC Quote Sizing", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Displays the count of any non-terminal QuoteRequestStatus values over time.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "max by(status) (status_count{wallet=\"$relayer\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Request Statuses", + "type": "timeseries" } ], "refresh": "", @@ -474,10 +536,14 @@ "templating": { "list": [ { - "current": {}, + "current": { + "selected": false, + "text": "0xDD50676F81f607fD8bA7Ed3187DdF172DB174CD3", + "value": "0xDD50676F81f607fD8bA7Ed3187DdF172DB174CD3" + }, "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "definition": "label_values(inventory_balance,relayer)", "description": "Address of the relayer displayed in this dashboard.", @@ -507,6 +573,6 @@ "timezone": "", "title": "RFQ Relayer", "uid": "c394e799-062f-4bfd-90d6-b94f2932575a", - "version": 17, + "version": 18, "weekStart": "" } diff --git a/services/rfq/relayer/reldb/base/quote.go b/services/rfq/relayer/reldb/base/quote.go index 8594f9320b..3cd1b7c61e 100644 --- a/services/rfq/relayer/reldb/base/quote.go +++ b/services/rfq/relayer/reldb/base/quote.go @@ -138,3 +138,35 @@ func isValidStateTransition(prevStatus, status reldb.QuoteRequestStatus) bool { } return status >= prevStatus } + +type statusCount struct { + Status int + Count int64 +} + +// GetStatusCounts gets the counts of quote requests by status. +func (s Store) GetStatusCounts(ctx context.Context, matchStatuses ...reldb.QuoteRequestStatus) (map[reldb.QuoteRequestStatus]int, error) { + inArgs := make([]int, len(matchStatuses)) + for i := range matchStatuses { + inArgs[i] = int(matchStatuses[i].Int()) + } + + var results []statusCount + tx := s.DB(). + WithContext(ctx). + Model(&RequestForQuote{}). + Select(fmt.Sprintf("%s, COUNT(*) as count", statusFieldName)). + Where(fmt.Sprintf("%s IN ?", statusFieldName), inArgs). + Group(statusFieldName). + Scan(&results) + if tx.Error != nil { + return nil, fmt.Errorf("could not get db results: %w", tx.Error) + } + + statuses := make(map[reldb.QuoteRequestStatus]int) + for _, result := range results { + statuses[reldb.QuoteRequestStatus(result.Status)] = int(result.Count) + } + + return statuses, nil +} diff --git a/services/rfq/relayer/reldb/db.go b/services/rfq/relayer/reldb/db.go index c63ad75527..0d7ada6a45 100644 --- a/services/rfq/relayer/reldb/db.go +++ b/services/rfq/relayer/reldb/db.go @@ -48,6 +48,8 @@ type Reader interface { GetRebalanceByID(ctx context.Context, rebalanceID string) (*Rebalance, error) // GetDBStats gets the database stats. GetDBStats(ctx context.Context) (*sql.DBStats, error) + // GetStatusCounts gets the counts of quote requests by status. + GetStatusCounts(ctx context.Context, matchStatuses ...QuoteRequestStatus) (map[QuoteRequestStatus]int, error) } // Service is the interface for the database service. diff --git a/services/rfq/relayer/service/otel.go b/services/rfq/relayer/service/otel.go new file mode 100644 index 0000000000..e59fb5b251 --- /dev/null +++ b/services/rfq/relayer/service/otel.go @@ -0,0 +1,82 @@ +package service + +import ( + "context" + "fmt" + + "github.com/cornelk/hashmap" + "github.com/synapsecns/sanguine/core/metrics" + "github.com/synapsecns/sanguine/ethergo/signer/signer" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +const meterName = "github.com/synapsecns/sanguine/services/rfq/relayer/service" + +// generate an interface for otelRecorder that exports the public method. +// this allows us to avoid using recordX externally anad makes the package less confusing. +// +// ============================================================================= +// ============================================================================= +// IMPORTANT: DO NOT REMOVE THIS COMMENT. +// NOTICE: PLEASE MAKE SURE YOU UPDATE BOTH THE DOCS AND THE GRAFANA DASHBOARD (IF NEEDED) AFTER UPDATING METRICS. +// ============================================================================= +// ============================================================================= +// +//go:generate go run github.com/vburenin/ifacemaker -f otel.go -s otelRecorder -i iOtelRecorder -p service -o otel_generated.go -c "autogenerated file" +type otelRecorder struct { + metrics metrics.Handler + // meter is the metrics meter. + meter metric.Meter + // statusCountGauge is the gauge for the status. + statusCountGauge metric.Int64ObservableGauge + // statusCounts is used for metrics. + // status -> count + statusCounts *hashmap.Map[int, int] + // signer is the signer for signing transactions. + signer signer.Signer +} + +func newOtelRecorder(meterHandler metrics.Handler, signer signer.Signer) (_ iOtelRecorder, err error) { + or := otelRecorder{ + metrics: meterHandler, + meter: meterHandler.Meter(meterName), + statusCounts: hashmap.New[int, int](), + signer: signer, + } + + or.statusCountGauge, err = or.meter.Int64ObservableGauge("status_count") + if err != nil { + return nil, fmt.Errorf("could not create status count gauge: %w", err) + } + + _, err = or.meter.RegisterCallback(or.recordStatusCounts, or.statusCountGauge) + if err != nil { + return nil, fmt.Errorf("could not register callback for status count gauge: %w", err) + } + + return &or, nil +} + +func (o *otelRecorder) recordStatusCounts(_ context.Context, observer metric.Observer) (err error) { + if o.metrics == nil || o.statusCountGauge == nil || o.statusCounts == nil { + return nil + } + + o.statusCounts.Range(func(status int, count int) bool { + opts := metric.WithAttributes( + attribute.Int("status", status), + attribute.String("wallet", o.signer.Address().Hex()), + ) + observer.ObserveInt64(o.statusCountGauge, int64(count), opts) + + return true + }) + + return nil +} + +// RecordStatusCounts records the request status count. +func (o *otelRecorder) RecordStatusCount(status, count int) { + o.statusCounts.Set(status, count) +} diff --git a/services/rfq/relayer/service/otel_generated.go b/services/rfq/relayer/service/otel_generated.go new file mode 100644 index 0000000000..cd4b12b00c --- /dev/null +++ b/services/rfq/relayer/service/otel_generated.go @@ -0,0 +1,9 @@ +// autogenerated file + +package service + +// iOtelRecorder ... +type iOtelRecorder interface { + // RecordStatusCounts records the request status count. + RecordStatusCount(status, count int) +} diff --git a/services/rfq/relayer/service/relayer.go b/services/rfq/relayer/service/relayer.go index 4a7ffa3569..9cc5de3578 100644 --- a/services/rfq/relayer/service/relayer.go +++ b/services/rfq/relayer/service/relayer.go @@ -62,7 +62,8 @@ type Relayer struct { // semaphore is used to limit the number of concurrent requests semaphore *semaphore.Weighted // handlerMtx is used to synchronize handling of relay requests - handlerMtx mapmutex.StringMapMutex + handlerMtx mapmutex.StringMapMutex + otelRecorder iOtelRecorder } var logger = log.Logger("relayer") @@ -142,6 +143,11 @@ func NewRelayer(ctx context.Context, metricHandler metrics.Handler, cfg relconfi return nil, fmt.Errorf("could not get api server: %w", err) } + otelRecorder, err := newOtelRecorder(metricHandler, sg) + if err != nil { + return nil, fmt.Errorf("could not get otel recorder: %w", err) + } + cache := ttlcache.New[common.Hash, bool](ttlcache.WithTTL[common.Hash, bool](time.Second * 30)) rel := Relayer{ db: store, @@ -159,6 +165,7 @@ func NewRelayer(ctx context.Context, metricHandler metrics.Handler, cfg relconfi apiClient: apiClient, semaphore: semaphore.NewWeighted(maxConcurrentRequests), handlerMtx: mapmutex.NewStringMapMutex(), + otelRecorder: otelRecorder, } return &rel, nil } @@ -280,6 +287,14 @@ func (r *Relayer) Start(ctx context.Context) (err error) { return nil }) + g.Go(func() error { + err = r.recordMetrics(ctx) + if err != nil { + return fmt.Errorf("could not record metrics: %w", err) + } + return nil + }) + err = g.Wait() if err != nil { return fmt.Errorf("could not start: %w", err) @@ -360,6 +375,25 @@ func (r *Relayer) startGuard(ctx context.Context) (err error) { return nil } +const defaultMetricsInterval = 10 + +func (r *Relayer) recordMetrics(ctx context.Context) (err error) { + for { + select { + case <-ctx.Done(): + return fmt.Errorf("could not record metrics: %w", ctx.Err()) + case <-time.After(defaultMetricsInterval * time.Second): + statusCounts, err := r.db.GetStatusCounts(ctx, reldb.Seen, reldb.NotEnoughInventory, reldb.CommittedPending, reldb.CommittedConfirmed, reldb.RelayStarted, reldb.RelayCompleted, reldb.ProvePosting, reldb.ProvePosted, reldb.ClaimPending) + if err != nil { + return fmt.Errorf("could not get status counts: %w", err) + } + for status, count := range statusCounts { + r.otelRecorder.RecordStatusCount(int(status.Int()), count) + } + } + } +} + func (r *Relayer) processDB(ctx context.Context, serial bool, matchStatuses ...reldb.QuoteRequestStatus) (err error) { ctx, span := r.metrics.Tracer().Start(ctx, "processDB", trace.WithAttributes( attribute.Bool("serial", serial),