-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ENH] Latency histograms for get/insert/remove/clear of cache. #3018
Changes from all commits
32874a6
7372fe5
c11a1ae
d60804c
b851fc7
681299a
e08a03f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,56 @@ use opentelemetry_sdk::propagation::TraceContextPropagator; | |
use tracing_bunyan_formatter::BunyanFormattingLayer; | ||
use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Layer}; | ||
|
||
#[derive(Clone, Debug, Default)] | ||
struct ChromaShouldSample; | ||
|
||
const BUSY_NS: opentelemetry::Key = opentelemetry::Key::from_static_str("busy_ns"); | ||
const IDLE_NS: opentelemetry::Key = opentelemetry::Key::from_static_str("idle_ns"); | ||
|
||
fn is_slow(attributes: &[opentelemetry::KeyValue]) -> bool { | ||
let mut nanos = 0i64; | ||
for attr in attributes { | ||
if attr.key == BUSY_NS || attr.key == IDLE_NS { | ||
if let opentelemetry::Value::I64(ns) = attr.value { | ||
nanos += ns; | ||
} | ||
} | ||
} | ||
nanos > 1_000_000 | ||
} | ||
|
||
impl opentelemetry_sdk::trace::ShouldSample for ChromaShouldSample { | ||
fn should_sample( | ||
&self, | ||
_: Option<&opentelemetry::Context>, | ||
_: opentelemetry::trace::TraceId, | ||
name: &str, | ||
_: &opentelemetry::trace::SpanKind, | ||
attributes: &[opentelemetry::KeyValue], | ||
_: &[opentelemetry::trace::Link], | ||
) -> opentelemetry::trace::SamplingResult { | ||
// NOTE(rescrv): THIS IS A HACK! If you find yourself seriously extending it, it's time | ||
// to investigate honeycomb's sampling capabilities. | ||
|
||
// If the name is not get and not insert, or the request is slow, sample it. | ||
// Otherwise, drop. | ||
// This filters filters foyer calls in-process so they won't be overwhelming the tracing. | ||
if (name != "get" && name != "insert") || is_slow(attributes) { | ||
opentelemetry::trace::SamplingResult { | ||
decision: opentelemetry::trace::SamplingDecision::RecordAndSample, | ||
attributes: vec![], | ||
trace_state: opentelemetry::trace::TraceState::default(), | ||
} | ||
} else { | ||
opentelemetry::trace::SamplingResult { | ||
decision: opentelemetry::trace::SamplingDecision::Drop, | ||
attributes: vec![], | ||
trace_state: opentelemetry::trace::TraceState::default(), | ||
} | ||
} | ||
} | ||
} | ||
|
||
pub(crate) fn init_otel_tracing(service_name: &String, otel_endpoint: &String) { | ||
println!( | ||
"Registering jaeger subscriber for {} at endpoint {}", | ||
|
@@ -16,7 +66,7 @@ pub(crate) fn init_otel_tracing(service_name: &String, otel_endpoint: &String) { | |
)]); | ||
// Prepare trace config. | ||
let trace_config = opentelemetry_sdk::trace::Config::default() | ||
.with_sampler(opentelemetry_sdk::trace::Sampler::AlwaysOn) | ||
.with_sampler(ChromaShouldSample) | ||
.with_resource(resource); | ||
// Prepare exporter. | ||
let exporter = opentelemetry_otlp::new_exporter() | ||
|
@@ -36,14 +86,29 @@ pub(crate) fn init_otel_tracing(service_name: &String, otel_endpoint: &String) { | |
// Layer for printing spans to stdout. Only print INFO logs by default. | ||
let stdout_layer = | ||
BunyanFormattingLayer::new(service_name.clone().to_string(), std::io::stdout) | ||
.with_filter(tracing_subscriber::filter::FilterFn::new(|metadata| { | ||
// NOTE(rescrv): This is a hack, too. Not an uppercase hack, just a hack. This | ||
// one's localized to the cache module. There's not much to do to unify it with | ||
// the otel filter because these are different output layers from the tracing. | ||
|
||
// This filter ensures that we don't cache calls for get/insert on stdout, but will | ||
// still see the clear call. | ||
!(metadata | ||
.module_path() | ||
.unwrap_or("") | ||
.starts_with("chroma_cache") | ||
&& metadata.name() != "clear") | ||
})) | ||
.with_filter(tracing_subscriber::filter::LevelFilter::INFO); | ||
// global filter layer. Don't filter anything at above trace at the global layer for chroma. | ||
// And enable errors for every other library. | ||
let global_layer = EnvFilter::new(std::env::var("RUST_LOG").unwrap_or_else(|_| { | ||
"error,".to_string() | ||
+ &vec![ | ||
"chroma", | ||
"chroma-blockstore", | ||
"chroma-config", | ||
"chroma-cache", | ||
"chroma-distance", | ||
"chroma-error", | ||
"chroma-index", | ||
|
@@ -96,4 +161,13 @@ pub(crate) fn init_otel_tracing(service_name: &String, otel_endpoint: &String) { | |
|
||
prev_hook(panic_info); | ||
})); | ||
let exporter = opentelemetry_otlp::new_exporter() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for my learning what is this new exporter and provider for? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Metrics. They get installed as the exporter and provider for the global metrics. Do you see a way to reuse the other exporter? I'd like that, too. |
||
.tonic() | ||
.with_endpoint(otel_endpoint); | ||
let provider = opentelemetry_otlp::new_pipeline() | ||
.metrics(opentelemetry_sdk::runtime::Tokio) | ||
.with_exporter(exporter) | ||
.build() | ||
.expect("Failed to build metrics provider"); | ||
global::set_meter_provider(provider); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
curious why this is measured separately instead of looking at the span duration?