Skip to content

Commit

Permalink
chore: active orders observability TODOs (#469)
Browse files Browse the repository at this point in the history
* fix: active orders overflow bug

* chore: active orders observability TODOs

* updates

* lint

* chore: active orders observability TODOs

Introduce Prometheus error counters

---------

Co-authored-by: Deividas Petraitis <[email protected]>
(cherry picked from commit 4f5b8b8)
  • Loading branch information
p0mvn authored and mergify[bot] committed Aug 26, 2024
1 parent 0583181 commit bf78411
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 7 deletions.
17 changes: 17 additions & 0 deletions domain/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ var (
// * height - the height of the block being processed
SQSIngestUsecaseProcessBlockDurationMetricName = "sqs_ingest_usecase_process_block_duration"

// sqs_ingest_usecase_process_orderbook_pool_error_total
//
// counter that measures the number of errors that occur during processing an orderbook pool in ingest usecase
//
// Has the following labels:
// * err - the error message occurred
// * pool_id - the indentifier of the pool being processed
SQSIngestUsecaseProcessOrderbookPoolErrorMetricName = "sqs_ingest_usecase_process_orderbook_pool_error_total"

// sqs_ingest_usecase_process_block_error
//
// counter that measures the number of errors that occur during processing a block in ingest usecase
Expand Down Expand Up @@ -148,6 +157,13 @@ var (
},
)

SQSIngestHandlerProcessOrderbookPoolErrorCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: SQSIngestUsecaseProcessOrderbookPoolErrorMetricName,
Help: "counter that measures the number of errors that occur during processing an orderbook pool in ingest usecase",
},
)

SQSIngestHandlerPoolParseErrorCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: SQSIngestUsecaseParsePoolErrorMetricName,
Expand Down Expand Up @@ -279,6 +295,7 @@ var (
func init() {
prometheus.MustRegister(SQSIngestHandlerProcessBlockDurationGauge)
prometheus.MustRegister(SQSIngestHandlerProcessBlockErrorCounter)
prometheus.MustRegister(SQSIngestHandlerProcessOrderbookPoolErrorCounter)
prometheus.MustRegister(SQSIngestHandlerPoolParseErrorCounter)
prometheus.MustRegister(SQSPricingWorkerComputeDurationGauge)
prometheus.MustRegister(SQSPricingWorkerComputeErrorCounter)
Expand Down
3 changes: 2 additions & 1 deletion ingest/usecase/ingest_usecase.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ func (p *ingestUseCase) parsePoolData(ctx context.Context, poolData []*types.Poo
// and to avoid potential deadlock.
go func() {
if err := p.orderBookUseCase.ProcessPool(ctx, poolResult.pool); err != nil {
p.logger.Error("failed to process orderbook pool", zap.Error(err), zap.Uint64("pool_id", poolID))
domain.SQSIngestHandlerProcessOrderbookPoolErrorCounter.Inc()
p.logger.Error(domain.SQSIngestUsecaseProcessOrderbookPoolErrorMetricName, zap.Error(err), zap.Uint64("pool_id", poolID))
}
}()
}
Expand Down
56 changes: 56 additions & 0 deletions orderbook/telemetry/telemetry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package telemetry

import "github.com/prometheus/client_golang/prometheus"

var (
// sqs_orderbook_usecase_get_active_orders_error_total
//
// counter that measures the number of errors that occur during getting active orders in orderbook usecase
//
// Has the following labels:
// * contract - the address of the orderbook contract
// * address - address of the user wallet
// * err - the error message occurred
GetActiveOrdersErrorMetricName = "sqs_orderbook_usecase_get_active_orders_error_total"

// sqs_orderbook_usecase_get_tick_by_id_not_found_total
//
// counter that measures the number of times a tick is not found by id in orderbook usecase
GetTickByIDNotFoundMetricName = "sqs_orderbook_usecase_get_tick_by_id_not_found_total"

// sqs_orderbook_usecase_create_limit_order_error_total
//
// counter that measures the number of errors that occur during creating limit order in orderbook
//
// Has the following labels:
// * order - the order from orderbook that was attempted to be created as a limit order
// * err - the error message occurred
CreateLimitOrderErrorMetricName = "sqs_orderbook_usecase_create_limit_order_error_total"

GetActiveOrdersErrorCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: GetActiveOrdersErrorMetricName,
Help: "counter that measures the number of errors that occur during retrieving active orders from orderbook contract",
},
)

GetTickByIDNotFoundCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: GetTickByIDNotFoundMetricName,
Help: "counter that measures the number of not found ticks by ID that occur during retrieving active orders from orderbook contract",
},
)

CreateLimitOrderErrorCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: CreateLimitOrderErrorMetricName,
Help: "counter that measures the number errors that occur during creating a limit order orderbook from orderbook order",
},
)
)

func init() {
prometheus.MustRegister(GetActiveOrdersErrorCounter)
prometheus.MustRegister(GetTickByIDNotFoundCounter)
prometheus.MustRegister(CreateLimitOrderErrorCounter)
}
13 changes: 7 additions & 6 deletions orderbook/usecase/orderbook_usecase.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
orderbookdomain "github.com/osmosis-labs/sqs/domain/orderbook"
orderbookgrpcclientdomain "github.com/osmosis-labs/sqs/domain/orderbook/grpcclient"
"github.com/osmosis-labs/sqs/log"
"github.com/osmosis-labs/sqs/orderbook/telemetry"
"github.com/osmosis-labs/sqs/sqsdomain"
"go.uber.org/zap"

Expand Down Expand Up @@ -131,7 +132,8 @@ func (o *orderbookUseCaseImpl) GetActiveOrders(ctx context.Context, address stri
for _, orderbook := range orderbooks {
orders, count, err := o.orderBookClient.GetActiveOrders(context.TODO(), orderbook.ContractAddress, address)
if err != nil {
o.logger.Info("failed to fetch active orders", zap.Any("contract", orderbook.ContractAddress), zap.Any("contract", address), zap.Any("err", err))
telemetry.GetActiveOrdersErrorCounter.Inc()
o.logger.Error(telemetry.GetActiveOrdersErrorMetricName, zap.Any("contract", orderbook.ContractAddress), zap.Any("contract", address), zap.Any("err", err))
continue
}

Expand All @@ -157,10 +159,8 @@ func (o *orderbookUseCaseImpl) GetActiveOrders(ctx context.Context, address stri
for _, order := range orders {
repositoryTick, ok := o.orderbookRepository.GetTickByID(orderbook.PoolID, order.TickId)
if !ok {
o.logger.Info("tick not found", zap.Any("contract", orderbook.ContractAddress), zap.Any("ticks", order.TickId), zap.Any("ok", ok))

// TODO: if tick not found, add an alert
// Prometheus metric counter and alert
telemetry.GetTickByIDNotFoundCounter.Inc()
o.logger.Info(telemetry.GetTickByIDNotFoundMetricName, zap.Any("contract", orderbook.ContractAddress), zap.Any("ticks", order.TickId), zap.Any("ok", ok))
}

result, err := o.createLimitOrder(
Expand All @@ -178,7 +178,8 @@ func (o *orderbookUseCaseImpl) GetActiveOrders(ctx context.Context, address stri
orderbook.ContractAddress,
)
if err != nil {
o.logger.Info("failed to create limit order", zap.Any("order", order), zap.Any("err", err))
telemetry.CreateLimitOrderErrorCounter.Inc()
o.logger.Error(telemetry.CreateLimitOrderErrorMetricName, zap.Any("order", order), zap.Any("err", err))
continue
}

Expand Down

0 comments on commit bf78411

Please sign in to comment.