Skip to content

Commit

Permalink
Instrument Solver Engine (#2129)
Browse files Browse the repository at this point in the history
# Description
Solver engines are currently not exposing any metrics. This PR changes
that.

# Changes
- [ ] Collect histogram of initial time_limit when requests come in
- [ ] Collect histogram of remaining time when requests are done
- [ ] Collect number of proposed solutions
- [ ] Collect failure reasons

## How to test
Run everything locally and visit http://localhost:7872/metrics

Once merged, this will require an infra change to start scraping metrics
in prometheus.

## Related Issues

Fixes #1239
  • Loading branch information
fleupold authored Dec 8, 2023
1 parent 54da168 commit 03ed19b
Show file tree
Hide file tree
Showing 13 changed files with 98 additions and 26 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 1 addition & 6 deletions crates/driver/src/infra/api/routes/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
use prometheus::Encoder;

pub(in crate::infra::api) fn metrics(app: axum::Router<()>) -> axum::Router<()> {
app.route("/metrics", axum::routing::get(route))
}

async fn route() -> String {
let registry = observe::metrics::get_registry();
let encoder = prometheus::TextEncoder::new();
let mut buffer = Vec::new();
encoder.encode(&registry.gather(), &mut buffer).unwrap();
String::from_utf8(buffer).unwrap()
observe::metrics::encode(registry)
}
9 changes: 8 additions & 1 deletion crates/observe/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use {once_cell::sync::OnceCell, std::collections::HashMap};
use {once_cell::sync::OnceCell, prometheus::Encoder, std::collections::HashMap};

/// Global metrics registry used by all components.
static REGISTRY: OnceCell<prometheus_metric_storage::StorageRegistry> = OnceCell::new();
Expand Down Expand Up @@ -51,3 +51,10 @@ pub fn get_registry() -> &'static prometheus::Registry {
pub fn get_storage_registry() -> &'static prometheus_metric_storage::StorageRegistry {
REGISTRY.get_or_init(prometheus_metric_storage::StorageRegistry::default)
}

pub fn encode(registry: &prometheus::Registry) -> String {
let encoder = prometheus::TextEncoder::new();
let mut buffer = Vec::new();
encoder.encode(&registry.gather(), &mut buffer).unwrap();
String::from_utf8(buffer).unwrap()
}
16 changes: 1 addition & 15 deletions crates/shared/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use {
prometheus::Encoder,
std::{convert::Infallible, net::SocketAddr, sync::Arc},
tokio::task::{self, JoinHandle},
warp::{Filter, Rejection, Reply},
Expand All @@ -21,20 +20,7 @@ pub fn serve_metrics(liveness: Arc<dyn LivenessChecking>, address: SocketAddr) -
// `/metrics` route exposing encoded prometheus data to monitoring system
pub fn handle_metrics() -> impl Filter<Extract = (impl Reply,), Error = Rejection> + Clone {
let registry = observe::metrics::get_registry();
warp::path("metrics").map(move || {
let encoder = prometheus::TextEncoder::new();
let mut buffer = Vec::new();
if let Err(e) = encoder.encode(&registry.gather(), &mut buffer) {
tracing::error!("could not encode metrics: {}", e);
};
match String::from_utf8(buffer) {
Ok(v) => v,
Err(e) => {
tracing::error!("metrics could not be from_utf8'd: {}", e);
String::default()
}
}
})
warp::path("metrics").map(move || observe::metrics::encode(registry))
}

fn handle_liveness(
Expand Down
2 changes: 2 additions & 0 deletions crates/solvers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ hex = "0.4"
hyper = "0.14"
itertools = "0.11"
num = "0.4"
prometheus = { workspace = true }
prometheus-metric-storage = { workspace = true }
reqwest = "0.11"
serde = "1"
serde_json = "1"
Expand Down
1 change: 1 addition & 0 deletions crates/solvers/src/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ impl Api {
shutdown: impl Future<Output = ()> + Send + 'static,
) -> Result<(), hyper::Error> {
let app = axum::Router::new()
.route("/metrics", axum::routing::get(routes::metrics))
.route("/healthz", axum::routing::get(routes::healthz))
.route("/solve", axum::routing::post(routes::solve))
.route("/notify", axum::routing::post(routes::notify))
Expand Down
4 changes: 4 additions & 0 deletions crates/solvers/src/api/routes/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub async fn metrics() -> String {
let registry = observe::metrics::get_registry();
observe::metrics::encode(registry)
}
3 changes: 2 additions & 1 deletion crates/solvers/src/api/routes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use serde::Serialize;

mod healthz;
mod metrics;
mod notify;
mod solve;

pub(super) use {healthz::healthz, notify::notify, solve::solve};
pub(super) use {healthz::healthz, metrics::metrics, notify::notify, solve::solve};

#[derive(Debug, Serialize)]
#[serde(untagged)]
Expand Down
1 change: 1 addition & 0 deletions crates/solvers/src/domain/solver/dex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ impl Dex {
gas_price: auction::GasPrice,
) -> Option<dex::Swap> {
let dex_err_handler = |err: infra::dex::Error| {
infra::metrics::solve_error(err.format_variant());
match &err {
err @ infra::dex::Error::NotFound => {
if order.partially_fillable {
Expand Down
13 changes: 10 additions & 3 deletions crates/solvers/src/domain/solver/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::domain::{auction, notification, solution};
use crate::{
domain::{auction, notification, solution},
infra::metrics,
};

pub mod baseline;
pub mod dex;
Expand All @@ -19,12 +22,16 @@ impl Solver {
/// returning multiple solutions to later merge multiple non-overlapping
/// solutions to get one big more gas efficient solution.
pub async fn solve(&self, auction: auction::Auction) -> Vec<solution::Solution> {
match self {
metrics::solve(&auction);
let deadline = auction.deadline.clone();
let solutions = match self {
Solver::Baseline(solver) => solver.solve(auction).await,
Solver::Naive(solver) => solver.solve(auction).await,
Solver::Legacy(solver) => solver.solve(auction).await,
Solver::Dex(solver) => solver.solve(auction).await,
}
};
metrics::solved(&deadline, &solutions);
solutions
}

/// Notifies the solver about important events. Some of those events are
Expand Down
12 changes: 12 additions & 0 deletions crates/solvers/src/infra/dex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ pub enum Error {
Other(Box<dyn std::error::Error + Send + Sync>),
}

impl Error {
/// for instrumentization purposes
pub fn format_variant(&self) -> &'static str {
match self {
Self::OrderNotSupported => "OrderNotSupported",
Self::NotFound => "NotFound",
Self::RateLimited => "RateLimited",
Self::Other(_) => "Other",
}
}
}

impl From<balancer::Error> for Error {
fn from(err: balancer::Error) -> Self {
match err {
Expand Down
53 changes: 53 additions & 0 deletions crates/solvers/src/infra/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use crate::domain::{auction, solution};

/// Metrics for the solver engine.
#[derive(Debug, Clone, prometheus_metric_storage::MetricStorage)]
#[metric(subsystem = "solver_engine")]
struct Metrics {
/// The amount of time this solver engine has for solving.
#[metric(buckets(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))]
time_limit: prometheus::Histogram,

/// The amount of time this solver engine has left when it finished solving.
#[metric(buckets(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))]
remaining_time: prometheus::Histogram,

/// Errors that occurred during solving.
#[metric(labels("reason"))]
solve_errors: prometheus::IntCounterVec,

/// The number of solutions that were found.
solutions: prometheus::IntCounter,
}

/// Setup the metrics registry.
pub fn init() {
observe::metrics::setup_registry_reentrant(Some("solver-engine".to_owned()), None);
}

pub fn solve(auction: &auction::Auction) {
get().time_limit.observe(
auction
.deadline
.remaining()
.unwrap_or_default()
.as_secs_f64(),
);
}

pub fn solved(deadline: &auction::Deadline, solutions: &[solution::Solution]) {
get()
.remaining_time
.observe(deadline.remaining().unwrap_or_default().as_secs_f64());
get().solutions.inc_by(solutions.len() as u64);
}

pub fn solve_error(reason: &str) {
get().solve_errors.with_label_values(&[reason]).inc();
}

/// Get the metrics instance.
fn get() -> &'static Metrics {
Metrics::instance(observe::metrics::get_storage_registry())
.expect("unexpected error getting metrics instance")
}
1 change: 1 addition & 0 deletions crates/solvers/src/infra/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pub mod cli;
pub mod config;
pub mod contracts;
pub mod dex;
pub mod metrics;

0 comments on commit 03ed19b

Please sign in to comment.