Skip to content

Commit

Permalink
feat(rt): expose tokio runtime metrics (#3177)
Browse files Browse the repository at this point in the history
Tokio has an unstable feature supporting runtime metrics.

This allows service operators to export metrics to systems like
Prometheus, so that they can observe how their workloads are performing
on the tokio runtime. This exposes information like the number of worker
threads, queue depth, the number of tasks polled, and so on.

`linkerd2-proxy` should expose these metrics.

This uses the `kubert-prometheus-tokio` crate to register a `Runtime`
metrics worker, and spawn a task to probe these metrics at a fixed,
regular interval.

see: <https://github.com/olix0r/kubert/tree/main/kubert-prometheus-tokio>

if the `tokio_unstable` feature is not enabled, this will emit a debug
event and do nothing.

Signed-off-by: katelyn martin <[email protected]>
  • Loading branch information
cratelyn authored Sep 10, 2024
1 parent 23e3c4e commit 193dcaf
Show file tree
Hide file tree
Showing 12 changed files with 85 additions and 9 deletions.
1 change: 1 addition & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"remoteUser": "code",
"containerEnv": {
"CXX": "clang++-14",
"RUSTFLAGS": "--cfg tokio_unstable"
},
"mounts": [
{
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/beta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
RUSTUP_MAX_RETRIES: 10
RUSTFLAGS: "-D warnings"
RUSTFLAGS: "-D warnings --cfg tokio_unstable"

permissions:
contents: read
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ concurrency:
env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
RUSTFLAGS: "-D warnings -A deprecated -C debuginfo=2"
RUSTFLAGS: "-D warnings -A deprecated --cfg tokio_unstable -C debuginfo=2"
RUSTUP_MAX_RETRIES: 10

jobs:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fuzzers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
RUST_BACKTRACE: short
RUSTFLAGS: "-D warnings -A deprecated -C debuginfo=0"
RUSTFLAGS: "-D warnings -A deprecated --cfg tokio_unstable -C debuginfo=0"
RUSTUP_MAX_RETRIES: 10

permissions:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ on:
env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
RUSTFLAGS: "-D warnings -A opaque_hidden_inferred_bound -C debuginfo=0"
RUSTFLAGS: "-D warnings -A opaque_hidden_inferred_bound --cfg tokio_unstable -C debuginfo=0"
RUSTUP_MAX_RETRIES: 10

permissions:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
RUSTUP_MAX_RETRIES: 10
RUSTFLAGS: "-D warnings -D deprecated -C debuginfo=0"
RUSTFLAGS: "-D warnings -D deprecated --cfg tokio_unstable -C debuginfo=0"

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ on:
env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
RUSTFLAGS: "-D warnings -A deprecated"
RUSTFLAGS: "-D warnings -A deprecated --cfg tokio_unstable"
RUSTUP_MAX_RETRIES: 10

concurrency:
Expand Down
37 changes: 37 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,17 @@ version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"

[[package]]
name = "futures-macro"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

[[package]]
name = "futures-sink"
version = "0.3.30"
Expand All @@ -674,6 +685,7 @@ dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
Expand Down Expand Up @@ -1020,6 +1032,18 @@ dependencies = [
"libc",
]

[[package]]
name = "kubert-prometheus-tokio"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a101fa3df488c89130664aaf4652986da49e204fb1725d089122f75b22ff6cbb"
dependencies = [
"prometheus-client",
"tokio",
"tokio-metrics",
"tracing",
]

[[package]]
name = "lazy_static"
version = "1.5.0"
Expand Down Expand Up @@ -2296,6 +2320,7 @@ version = "0.1.0"
dependencies = [
"futures",
"jemallocator",
"kubert-prometheus-tokio",
"linkerd-app",
"linkerd-meshtls",
"linkerd-metrics",
Expand Down Expand Up @@ -3378,6 +3403,18 @@ dependencies = [
"syn",
]

[[package]]
name = "tokio-metrics"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eace09241d62c98b7eeb1107d4c5c64ca3bd7da92e8c218c153ab3a78f9be112"
dependencies = [
"futures-util",
"pin-project-lite",
"tokio",
"tokio-stream",
]

[[package]]
name = "tokio-rustls"
version = "0.24.1"
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ RUN --mount=type=cache,id=cargo,target=/usr/local/cargo/registry \
# Build the proxy.
FROM fetch as build
ENV CARGO_INCREMENTAL=0
ENV RUSTFLAGS="-D warnings -A deprecated"
ENV RUSTFLAGS="-D warnings -A deprecated --cfg tokio_unstable"
ARG TARGETARCH="amd64"
ARG PROFILE="release"
ARG LINKERD2_PROXY_VERSION=""
Expand Down
3 changes: 2 additions & 1 deletion linkerd2-proxy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ pprof = ["linkerd-app/pprof"]

[dependencies]
futures = { version = "0.3", default-features = false }
num_cpus = { version = "1", optional = true }
kubert-prometheus-tokio = { version = "0.1", features = ["rt"] }
linkerd-app = { path = "../linkerd/app" }
linkerd-metrics = { path = "../linkerd/metrics" }
# We don't actually use code from this crate in `main`; it's here only so we can
# control its feature flags.
linkerd-meshtls = { path = "../linkerd/meshtls" }
linkerd-signal = { path = "../linkerd/signal" }
num_cpus = { version = "1", optional = true }
tokio = { version = "1", features = ["rt", "time", "net"] }
tracing = "0.1"

Expand Down
5 changes: 4 additions & 1 deletion linkerd2-proxy/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ fn main() {
vendor = BUILD_INFO.vendor,
);

let metrics = linkerd_metrics::prom::Registry::default();
let mut metrics = linkerd_metrics::prom::Registry::default();

// Load configuration from the environment without binding ports.
let config = match Config::try_from_env() {
Expand All @@ -57,6 +57,9 @@ fn main() {
// `LINKERD2_PROXY_CORES` env or the number of available CPUs (as provided
// by cgroups, when possible).
rt::build().block_on(async move {
// Spawn a task to run in the background, exporting runtime metrics at a regular interval.
rt::spawn_metrics_exporter(&mut metrics);

let (shutdown_tx, mut shutdown_rx) = mpsc::unbounded_channel();
let shutdown_grace_period = config.shutdown_grace_period;

Expand Down
34 changes: 34 additions & 0 deletions linkerd2-proxy/src/rt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,37 @@ pub(crate) fn build() -> Runtime {
.build()
.expect("failed to build basic runtime!")
}

/// Spawns a task to scrape metrics for the given runtime at a regular interval.
///
/// Note that this module requires unstable tokio functionality that must be
/// enabled via the `tokio_unstable` feature. When it is not enabled, no metrics
/// will be registered.
///
/// `RUSTFLAGS="--cfg tokio_unstable"` must be set at build-time to use this feature.
pub fn spawn_metrics_exporter(registry: &mut linkerd_metrics::prom::Registry) {
#[cfg(tokio_unstable)]
{
use {std::time::Duration, tracing::Instrument};

/// The fixed interval at which tokio runtime metrics are updated.
//
// TODO(kate): perhaps this could be configurable eventually. for now, it's hard-coded.
const INTERVAL: Duration = Duration::from_secs(1);

let mut interval = tokio::time::interval(INTERVAL);

let registry = registry.sub_registry_with_prefix("tokio_rt");
let runtime = tokio::runtime::Handle::current();
let metrics = kubert_prometheus_tokio::Runtime::register(registry, runtime);

tokio::spawn(
async move { metrics.updated(&mut interval).await }
.instrument(tracing::info_span!("kubert-prom-tokio-rt")),
);
}
#[cfg(not(tokio_unstable))]
{
tracing::debug!("Tokio runtime metrics cannot be monitored without the tokio_unstable cfg");
}
}

0 comments on commit 193dcaf

Please sign in to comment.