-
Notifications
You must be signed in to change notification settings - Fork 254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
metrics: add running metrics for grpc #639
base: master
Are you sure you want to change the base?
Changes from 5 commits
a086c63
9997865
df91682
f2ba50f
2de40d2
291d193
8a3c9a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -27,6 +27,8 @@ prost = { version = "0.11", optional = true } | |||||
bytes = { version = "1.0", optional = true } | ||||||
log = "0.4" | ||||||
parking_lot = "0.12" | ||||||
prometheus = { version = "0.13", default-features = false } | ||||||
lazy_static = "1" | ||||||
|
||||||
[workspace] | ||||||
members = [ | ||||||
|
@@ -42,8 +44,9 @@ members = [ | |||||
exclude = ["xtask"] | ||||||
|
||||||
[features] | ||||||
default = ["protobuf-codec", "boringssl"] | ||||||
default = ["protobuf-codec", "boringssl","prometheus"] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should not enable the "prometheus" by default. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It maybe bring the resouce usage. |
||||||
_secure = [] | ||||||
prometheus = [] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
protobuf-codec = ["protobuf"] | ||||||
protobufv3-codec = ["protobufv3"] | ||||||
prost-codec = ["prost", "bytes"] | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -5,17 +5,113 @@ use std::sync::mpsc; | |||||
use std::sync::Arc; | ||||||
use std::thread::{Builder as ThreadBuilder, JoinHandle}; | ||||||
|
||||||
use crate::grpc_sys; | ||||||
|
||||||
use crate::cq::{CompletionQueue, CompletionQueueHandle, EventType, WorkQueue}; | ||||||
use crate::grpc_sys; | ||||||
use crate::task::CallTag; | ||||||
|
||||||
// event loop | ||||||
#[cfg(feature = "prometheus")] | ||||||
use { | ||||||
crate::metrics::{ | ||||||
GRPC_POOL_CQ_NEXT_DURATION, GRPC_POOL_EVENT_COUNT_VEC, GRPC_POOL_EXECUTE_DURATION, | ||||||
GRPC_TASK_WAIT_DURATION, | ||||||
}, | ||||||
crate::task::resolve, | ||||||
prometheus::{ | ||||||
core::{AtomicU64, GenericCounter}, | ||||||
Histogram, | ||||||
}, | ||||||
std::time::Instant, | ||||||
}; | ||||||
|
||||||
#[cfg(feature = "prometheus")] | ||||||
pub struct GRPCRunner { | ||||||
cq_next_duration_his: Histogram, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better use |
||||||
execute_duration_his: Histogram, | ||||||
wait_duration_his: Histogram, | ||||||
event_counter: [GenericCounter<AtomicU64>; 6], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
|
||||||
#[cfg(feature = "prometheus")] | ||||||
impl GRPCRunner { | ||||||
pub fn new(name: &String) -> GRPCRunner { | ||||||
let cq_next_duration_his = GRPC_POOL_CQ_NEXT_DURATION.with_label_values(&[name]); | ||||||
let execute_duration_his = GRPC_POOL_EXECUTE_DURATION.with_label_values(&[name]); | ||||||
let wait_duration_his = GRPC_TASK_WAIT_DURATION.with_label_values(&[name]); | ||||||
let event_counter = ["batch", "request", "unary", "abort", "action", "spawn"] | ||||||
.map(|event| GRPC_POOL_EVENT_COUNT_VEC.with_label_values(&[name, event])); | ||||||
GRPCRunner { | ||||||
cq_next_duration_his, | ||||||
execute_duration_his, | ||||||
wait_duration_his, | ||||||
event_counter, | ||||||
} | ||||||
} | ||||||
|
||||||
// event loop | ||||||
pub fn run(&self, tx: mpsc::Sender<CompletionQueue>) { | ||||||
let cq = Arc::new(CompletionQueueHandle::new()); | ||||||
let worker_info = Arc::new(WorkQueue::new()); | ||||||
let cq = CompletionQueue::new(cq, worker_info); | ||||||
tx.send(cq.clone()).expect("send back completion queue"); | ||||||
loop { | ||||||
let now = Instant::now(); | ||||||
let e = cq.next(); | ||||||
self.cq_next_duration_his | ||||||
.observe(now.elapsed().as_secs_f64()); | ||||||
let now = Instant::now(); | ||||||
match e.type_ { | ||||||
EventType::GRPC_QUEUE_SHUTDOWN => break, | ||||||
// timeout should not happen in theory. | ||||||
EventType::GRPC_QUEUE_TIMEOUT => continue, | ||||||
EventType::GRPC_OP_COMPLETE => {} | ||||||
} | ||||||
|
||||||
let tag: Box<CallTag> = unsafe { Box::from_raw(e.tag as _) }; | ||||||
self.resolve(tag, &cq, e.success != 0); | ||||||
while let Some(work) = unsafe { cq.worker.pop_work() } { | ||||||
work.finish(); | ||||||
} | ||||||
self.execute_duration_his | ||||||
.observe(now.elapsed().as_secs_f64()); | ||||||
} | ||||||
} | ||||||
|
||||||
fn resolve(&self, tag: Box<CallTag>, cq: &CompletionQueue, success: bool) { | ||||||
match *tag { | ||||||
CallTag::Batch(prom) => { | ||||||
self.event_counter[0].inc(); | ||||||
prom.resolve(success) | ||||||
} | ||||||
CallTag::Request(cb) => { | ||||||
self.event_counter[1].inc(); | ||||||
cb.resolve(cq, success) | ||||||
} | ||||||
CallTag::UnaryRequest(cb) => { | ||||||
self.event_counter[2].inc(); | ||||||
cb.resolve(cq, success) | ||||||
} | ||||||
CallTag::Abort(_) => self.event_counter[3].inc(), | ||||||
CallTag::Action(prom) => { | ||||||
self.event_counter[4].inc(); | ||||||
prom.resolve(success) | ||||||
} | ||||||
CallTag::Spawn(task) => { | ||||||
self.event_counter[5].inc(); | ||||||
self.wait_duration_his | ||||||
.observe(task.reset_push_time().elapsed().as_secs_f64()); | ||||||
resolve(task, success) | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
#[cfg(not(feature = "prometheus"))] | ||||||
fn poll_queue(tx: mpsc::Sender<CompletionQueue>) { | ||||||
let cq = Arc::new(CompletionQueueHandle::new()); | ||||||
let worker_info = Arc::new(WorkQueue::new()); | ||||||
let cq = CompletionQueue::new(cq, worker_info); | ||||||
tx.send(cq.clone()).expect("send back completion queue"); | ||||||
|
||||||
loop { | ||||||
let e = cq.next(); | ||||||
match e.type_ { | ||||||
|
@@ -24,9 +120,7 @@ fn poll_queue(tx: mpsc::Sender<CompletionQueue>) { | |||||
EventType::GRPC_QUEUE_TIMEOUT => continue, | ||||||
EventType::GRPC_OP_COMPLETE => {} | ||||||
} | ||||||
|
||||||
let tag: Box<CallTag> = unsafe { Box::from_raw(e.tag as _) }; | ||||||
|
||||||
tag.resolve(&cq, e.success != 0); | ||||||
while let Some(work) = unsafe { cq.worker.pop_work() } { | ||||||
work.finish(); | ||||||
|
@@ -94,16 +188,23 @@ impl EnvBuilder { | |||||
for i in 0..self.cq_count { | ||||||
let tx_i = tx.clone(); | ||||||
let mut builder = ThreadBuilder::new(); | ||||||
if let Some(ref prefix) = self.name_prefix { | ||||||
builder = builder.name(format!("{prefix}-{i}")); | ||||||
} | ||||||
let name = self | ||||||
.name_prefix | ||||||
.as_ref() | ||||||
.map_or(format!("grpc-pool-{i}"), |prefix| format!("{prefix}-{i}")); | ||||||
#[cfg(feature = "prometheus")] | ||||||
let runner = GRPCRunner::new(&name); | ||||||
builder = builder.name(name); | ||||||
let after_start = self.after_start.clone(); | ||||||
let before_stop = self.before_stop.clone(); | ||||||
let handle = builder | ||||||
.spawn(move || { | ||||||
if let Some(f) = after_start { | ||||||
f(); | ||||||
} | ||||||
#[cfg(feature = "prometheus")] | ||||||
runner.run(tx_i); | ||||||
#[cfg(not(feature = "prometheus"))] | ||||||
poll_queue(tx_i); | ||||||
if let Some(f) = before_stop { | ||||||
f(); | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ mod env; | |
mod error; | ||
mod log_util; | ||
mod metadata; | ||
mod metrics; | ||
mod quota; | ||
mod security; | ||
mod server; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. | ||
|
||
//! Metrics of the grpc pool. | ||
|
||
use lazy_static::lazy_static; | ||
use prometheus::*; | ||
|
||
lazy_static! { | ||
/// Grpc wait duration of one task. | ||
pub static ref GRPC_TASK_WAIT_DURATION: HistogramVec = register_histogram_vec!( | ||
"grpc_task_wait_duration", | ||
"Bucketed histogram of grpc wait time only for Spawn task", | ||
&["name"], | ||
exponential_buckets(1e-7, 2.0, 20).unwrap() // 100ns ~ 100ms | ||
) | ||
.unwrap(); | ||
|
||
// Grpc pool io handle duration . | ||
pub static ref GRPC_POOL_CQ_NEXT_DURATION: HistogramVec = register_histogram_vec!( | ||
"grpc_pool_cp_next_duration", | ||
"Bucketed histogram of grpc pool wait duration from the completion queue", | ||
&["name"], | ||
exponential_buckets(1e-7, 2.0, 20).unwrap() // 100ns ~ 100ms | ||
) | ||
.unwrap(); | ||
|
||
// Grpc handle execute duration | ||
pub static ref GRPC_POOL_EXECUTE_DURATION: HistogramVec = register_histogram_vec!( | ||
"grpc_pool_execute_duration", | ||
"Bucketed histogram of grpc pool execute duration for every time", | ||
&["name"], | ||
exponential_buckets(1e-7, 2.0, 20).unwrap() // 100ns ~ 100ms | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 100ms upper bound is too small here, maybe set the range to 10us~10s is better here. |
||
) | ||
.unwrap(); | ||
|
||
// Grpc pool event count task . | ||
pub static ref GRPC_POOL_EVENT_COUNT_VEC: IntCounterVec = register_int_counter_vec!( | ||
"grpc_pool_event_task_count", | ||
"Total event task count in grpc pool", | ||
&["name","event"] | ||
) | ||
.unwrap(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.