/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/metrics/mod.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Metrics for recording information about execution |
19 | | |
20 | | mod baseline; |
21 | | mod builder; |
22 | | mod value; |
23 | | |
24 | | use parking_lot::Mutex; |
25 | | use std::{ |
26 | | borrow::Cow, |
27 | | fmt::{Debug, Display}, |
28 | | sync::Arc, |
29 | | }; |
30 | | |
31 | | use hashbrown::HashMap; |
32 | | |
33 | | // public exports |
34 | | pub use baseline::{BaselineMetrics, RecordOutput}; |
35 | | pub use builder::MetricBuilder; |
36 | | pub use value::{Count, Gauge, MetricValue, ScopedTimerGuard, Time, Timestamp}; |
37 | | |
38 | | /// Something that tracks a value of interest (metric) of a DataFusion |
39 | | /// [`ExecutionPlan`] execution. |
40 | | /// |
41 | | /// Typically [`Metric`]s are not created directly, but instead |
42 | | /// are created using [`MetricBuilder`] or methods on |
43 | | /// [`ExecutionPlanMetricsSet`]. |
44 | | /// |
45 | | /// ``` |
46 | | /// use datafusion_physical_plan::metrics::*; |
47 | | /// |
48 | | /// let metrics = ExecutionPlanMetricsSet::new(); |
49 | | /// assert!(metrics.clone_inner().output_rows().is_none()); |
50 | | /// |
51 | | /// // Create a counter to increment using the MetricBuilder |
52 | | /// let partition = 1; |
53 | | /// let output_rows = MetricBuilder::new(&metrics) |
54 | | /// .output_rows(partition); |
55 | | /// |
56 | | /// // Counter can be incremented |
57 | | /// output_rows.add(13); |
58 | | /// |
59 | | /// // The value can be retrieved directly: |
60 | | /// assert_eq!(output_rows.value(), 13); |
61 | | /// |
62 | | /// // As well as from the metrics set |
63 | | /// assert_eq!(metrics.clone_inner().output_rows(), Some(13)); |
64 | | /// ``` |
65 | | /// |
66 | | /// [`ExecutionPlan`]: super::ExecutionPlan |
67 | | |
68 | | #[derive(Debug)] |
69 | | pub struct Metric { |
70 | | /// The value of the metric |
71 | | value: MetricValue, |
72 | | |
73 | | /// arbitrary name=value pairs identifying this metric |
74 | | labels: Vec<Label>, |
75 | | |
76 | | /// To which partition of an operators output did this metric |
77 | | /// apply? If `None` then means all partitions. |
78 | | partition: Option<usize>, |
79 | | } |
80 | | |
81 | | impl Display for Metric { |
82 | 4 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
83 | 4 | write!(f, "{}", self.value.name())?0 ; |
84 | | |
85 | 4 | let mut iter = self |
86 | 4 | .partition |
87 | 4 | .iter() |
88 | 4 | .map(|partition| Label::new("partition", partition.to_string())2 ) |
89 | 4 | .chain(self.labels().iter().cloned()) |
90 | 4 | .peekable(); |
91 | 4 | |
92 | 4 | // print out the labels specially |
93 | 4 | if iter.peek().is_some() { |
94 | 3 | write!(f, "{{")?0 ; |
95 | | |
96 | 3 | let mut is_first = true; |
97 | 7 | for i4 in iter { |
98 | 4 | if !is_first { |
99 | 1 | write!(f, ", ")?0 ; |
100 | 3 | } else { |
101 | 3 | is_first = false; |
102 | 3 | } |
103 | | |
104 | 4 | write!(f, "{i}")?0 ; |
105 | | } |
106 | | |
107 | 3 | write!(f, "}}")?0 ; |
108 | 1 | } |
109 | | |
110 | | // and now the value |
111 | 4 | write!(f, "={}", self.value) |
112 | 4 | } |
113 | | } |
114 | | |
115 | | impl Metric { |
116 | | /// Create a new [`Metric`]. Consider using [`MetricBuilder`] |
117 | | /// rather than this function directly. |
118 | 7 | pub fn new(value: MetricValue, partition: Option<usize>) -> Self { |
119 | 7 | Self { |
120 | 7 | value, |
121 | 7 | labels: vec![], |
122 | 7 | partition, |
123 | 7 | } |
124 | 7 | } |
125 | | |
126 | | /// Create a new [`Metric`]. Consider using [`MetricBuilder`] |
127 | | /// rather than this function directly. |
128 | 35.8k | pub fn new_with_labels( |
129 | 35.8k | value: MetricValue, |
130 | 35.8k | partition: Option<usize>, |
131 | 35.8k | labels: Vec<Label>, |
132 | 35.8k | ) -> Self { |
133 | 35.8k | Self { |
134 | 35.8k | value, |
135 | 35.8k | labels, |
136 | 35.8k | partition, |
137 | 35.8k | } |
138 | 35.8k | } |
139 | | |
140 | | /// Add a new label to this metric |
141 | 0 | pub fn with_label(mut self, label: Label) -> Self { |
142 | 0 | self.labels.push(label); |
143 | 0 | self |
144 | 0 | } |
145 | | |
146 | | /// What labels are present for this metric? |
147 | 6 | pub fn labels(&self) -> &[Label] { |
148 | 6 | &self.labels |
149 | 6 | } |
150 | | |
151 | | /// Return a reference to the value of this metric |
152 | 2.30k | pub fn value(&self) -> &MetricValue { |
153 | 2.30k | &self.value |
154 | 2.30k | } |
155 | | |
156 | | /// Return a mutable reference to the value of this metric |
157 | 10 | pub fn value_mut(&mut self) -> &mut MetricValue { |
158 | 10 | &mut self.value |
159 | 10 | } |
160 | | |
161 | | /// Return a reference to the partition |
162 | 1 | pub fn partition(&self) -> Option<usize> { |
163 | 1 | self.partition |
164 | 1 | } |
165 | | } |
166 | | |
167 | | /// A snapshot of the metrics for a particular ([`ExecutionPlan`]). |
168 | | /// |
169 | | /// [`ExecutionPlan`]: super::ExecutionPlan |
170 | | #[derive(Default, Debug, Clone)] |
171 | | pub struct MetricsSet { |
172 | | metrics: Vec<Arc<Metric>>, |
173 | | } |
174 | | |
175 | | impl MetricsSet { |
176 | | /// Create a new container of metrics |
177 | 2.74k | pub fn new() -> Self { |
178 | 2.74k | Default::default() |
179 | 2.74k | } |
180 | | |
181 | | /// Add the specified metric |
182 | 35.8k | pub fn push(&mut self, metric: Arc<Metric>) { |
183 | 35.8k | self.metrics.push(metric) |
184 | 35.8k | } |
185 | | |
186 | | /// Returns an iterator across all metrics |
187 | 7 | pub fn iter(&self) -> impl Iterator<Item = &Arc<Metric>> { |
188 | 7 | self.metrics.iter() |
189 | 7 | } |
190 | | |
191 | | /// Convenience: return the number of rows produced, aggregated |
192 | | /// across partitions or `None` if no metric is present |
193 | 15 | pub fn output_rows(&self) -> Option<usize> { |
194 | 63 | self.sum(|metric| matches!(metric.value(), MetricValue::OutputRows(_))) |
195 | 15 | .map(|v| v.as_usize()14 ) |
196 | 15 | } |
197 | | |
198 | | /// Convenience: return the count of spills, aggregated |
199 | | /// across partitions or `None` if no metric is present |
200 | 63 | pub fn spill_count(&self) -> Option<usize> { |
201 | 559 | self.sum(|metric| matches!(metric.value(), MetricValue::SpillCount(_))) |
202 | 63 | .map(|v| v.as_usize()62 ) |
203 | 63 | } |
204 | | |
205 | | /// Convenience: return the total byte size of spills, aggregated |
206 | | /// across partitions or `None` if no metric is present |
207 | 61 | pub fn spilled_bytes(&self) -> Option<usize> { |
208 | 547 | self.sum(|metric| matches!(metric.value(), MetricValue::SpilledBytes(_))) |
209 | 61 | .map(|v| v.as_usize()) |
210 | 61 | } |
211 | | |
212 | | /// Convenience: return the total rows of spills, aggregated |
213 | | /// across partitions or `None` if no metric is present |
214 | 61 | pub fn spilled_rows(&self) -> Option<usize> { |
215 | 547 | self.sum(|metric| matches!(metric.value(), MetricValue::SpilledRows(_))) |
216 | 61 | .map(|v| v.as_usize()) |
217 | 61 | } |
218 | | |
219 | | /// Convenience: return the amount of elapsed CPU time spent, |
220 | | /// aggregated across partitions or `None` if no metric is present |
221 | 7 | pub fn elapsed_compute(&self) -> Option<usize> { |
222 | 31 | self.sum(|metric| matches!(metric.value(), MetricValue::ElapsedCompute(_))) |
223 | 7 | .map(|v| v.as_usize()6 ) |
224 | 7 | } |
225 | | |
226 | | /// Sums the values for metrics for which `f(metric)` returns |
227 | | /// `true`, and returns the value. Returns `None` if no metrics match |
228 | | /// the predicate. |
229 | 210 | pub fn sum<F>(&self, mut f: F) -> Option<MetricValue> |
230 | 210 | where |
231 | 210 | F: FnMut(&Metric) -> bool, |
232 | 210 | { |
233 | 210 | let mut iter = self |
234 | 210 | .metrics |
235 | 210 | .iter() |
236 | 1.75k | .filter(|metric| f(metric.as_ref())) |
237 | 210 | .peekable(); |
238 | | |
239 | 210 | let mut accum206 = match iter.peek() { |
240 | | None => { |
241 | 4 | return None; |
242 | | } |
243 | 206 | Some(metric) => metric.value().new_empty(), |
244 | 206 | }; |
245 | 206 | |
246 | 210 | iter.for_each(|metric| accum.aggregate(metric.value())); |
247 | 206 | |
248 | 206 | Some(accum) |
249 | 210 | } |
250 | | |
251 | | /// Returns the sum of all the metrics with the specified name |
252 | | /// in the returned set. |
253 | 0 | pub fn sum_by_name(&self, metric_name: &str) -> Option<MetricValue> { |
254 | 0 | self.sum(|m| match m.value() { |
255 | 0 | MetricValue::Count { name, .. } => name == metric_name, |
256 | 0 | MetricValue::Time { name, .. } => name == metric_name, |
257 | 0 | MetricValue::OutputRows(_) => false, |
258 | 0 | MetricValue::ElapsedCompute(_) => false, |
259 | 0 | MetricValue::SpillCount(_) => false, |
260 | 0 | MetricValue::SpilledBytes(_) => false, |
261 | 0 | MetricValue::SpilledRows(_) => false, |
262 | 0 | MetricValue::CurrentMemoryUsage(_) => false, |
263 | 0 | MetricValue::Gauge { name, .. } => name == metric_name, |
264 | 0 | MetricValue::StartTimestamp(_) => false, |
265 | 0 | MetricValue::EndTimestamp(_) => false, |
266 | 0 | }) |
267 | 0 | } |
268 | | |
269 | | /// Returns a new derived `MetricsSet` where all metrics |
270 | | /// that had the same name have been |
271 | | /// aggregated together. The resulting `MetricsSet` has all |
272 | | /// metrics with `Partition=None` |
273 | 3 | pub fn aggregate_by_name(&self) -> Self { |
274 | 3 | let mut map = HashMap::new(); |
275 | | |
276 | | // There are all sorts of ways to make this more efficient |
277 | 13 | for metric10 in &self.metrics { |
278 | 10 | let key = metric.value.name(); |
279 | 10 | map.entry(key) |
280 | 10 | .and_modify(|accum: &mut Metric| { |
281 | 5 | accum.value_mut().aggregate(metric.value()); |
282 | 10 | }) |
283 | 10 | .or_insert_with(|| { |
284 | 5 | // accumulate with no partition |
285 | 5 | let partition = None; |
286 | 5 | let mut accum = Metric::new(metric.value().new_empty(), partition); |
287 | 5 | accum.value_mut().aggregate(metric.value()); |
288 | 5 | accum |
289 | 10 | }); |
290 | 10 | } |
291 | | |
292 | 3 | let new_metrics = map |
293 | 3 | .into_iter() |
294 | 4 | .map(|(_k, v)| Arc::new(v)) |
295 | 3 | .collect::<Vec<_>>(); |
296 | 3 | |
297 | 3 | Self { |
298 | 3 | metrics: new_metrics, |
299 | 3 | } |
300 | 3 | } |
301 | | |
302 | | /// Sort the order of metrics so the "most useful" show up first |
303 | 1 | pub fn sorted_for_display(mut self) -> Self { |
304 | 46 | self.metrics.sort_unstable_by_key(|metric| { |
305 | 46 | ( |
306 | 46 | metric.value().display_sort_key(), |
307 | 46 | metric.value().name().to_owned(), |
308 | 46 | ) |
309 | 46 | }); |
310 | 1 | self |
311 | 1 | } |
312 | | |
313 | | /// Remove all timestamp metrics (for more compact display) |
314 | 0 | pub fn timestamps_removed(self) -> Self { |
315 | 0 | let Self { metrics } = self; |
316 | 0 |
|
317 | 0 | let metrics = metrics |
318 | 0 | .into_iter() |
319 | 0 | .filter(|m| !m.value.is_timestamp()) |
320 | 0 | .collect::<Vec<_>>(); |
321 | 0 |
|
322 | 0 | Self { metrics } |
323 | 0 | } |
324 | | } |
325 | | |
326 | | impl Display for MetricsSet { |
327 | | /// Format the [`MetricsSet`] as a single string |
328 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
329 | 0 | let mut is_first = true; |
330 | 0 | for i in self.metrics.iter() { |
331 | 0 | if !is_first { |
332 | 0 | write!(f, ", ")?; |
333 | 0 | } else { |
334 | 0 | is_first = false; |
335 | 0 | } |
336 | | |
337 | 0 | write!(f, "{i}")?; |
338 | | } |
339 | 0 | Ok(()) |
340 | 0 | } |
341 | | } |
342 | | |
343 | | /// A set of [`Metric`]s for an individual "operator" (e.g. `&dyn |
344 | | /// ExecutionPlan`). |
345 | | /// |
346 | | /// This structure is intended as a convenience for [`ExecutionPlan`] |
347 | | /// implementations so they can generate different streams for multiple |
348 | | /// partitions but easily report them together. |
349 | | /// |
350 | | /// Each `clone()` of this structure will add metrics to the same |
351 | | /// underlying metrics set |
352 | | /// |
353 | | /// [`ExecutionPlan`]: super::ExecutionPlan |
354 | | #[derive(Default, Debug, Clone)] |
355 | | pub struct ExecutionPlanMetricsSet { |
356 | | inner: Arc<Mutex<MetricsSet>>, |
357 | | } |
358 | | |
359 | | impl ExecutionPlanMetricsSet { |
360 | | /// Create a new empty shared metrics set |
361 | 2.74k | pub fn new() -> Self { |
362 | 2.74k | Self { |
363 | 2.74k | inner: Arc::new(Mutex::new(MetricsSet::new())), |
364 | 2.74k | } |
365 | 2.74k | } |
366 | | |
367 | | /// Add the specified metric to the underlying metric set |
368 | 35.8k | pub fn register(&self, metric: Arc<Metric>) { |
369 | 35.8k | self.inner.lock().push(metric) |
370 | 35.8k | } |
371 | | |
372 | | /// Return a clone of the inner [`MetricsSet`] |
373 | 265 | pub fn clone_inner(&self) -> MetricsSet { |
374 | 265 | let guard = self.inner.lock(); |
375 | 265 | (*guard).clone() |
376 | 265 | } |
377 | | } |
378 | | |
379 | | /// `name=value` pairs identifiying a metric. This concept is called various things |
380 | | /// in various different systems: |
381 | | /// |
382 | | /// "labels" in |
383 | | /// [prometheus](https://prometheus.io/docs/concepts/data_model/) and |
384 | | /// "tags" in |
385 | | /// [InfluxDB](https://docs.influxdata.com/influxdb/v1.8/write_protocols/line_protocol_tutorial/) |
386 | | /// , "attributes" in [open |
387 | | /// telemetry]<https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/datamodel.md>, |
388 | | /// etc. |
389 | | /// |
390 | | /// As the name and value are expected to mostly be constant strings, |
391 | | /// use a [`Cow`] to avoid copying / allocations in this common case. |
392 | | #[derive(Debug, Clone, PartialEq, Eq, Hash)] |
393 | | pub struct Label { |
394 | | name: Cow<'static, str>, |
395 | | value: Cow<'static, str>, |
396 | | } |
397 | | |
398 | | impl Label { |
399 | | /// Create a new [`Label`] |
400 | 5.60k | pub fn new( |
401 | 5.60k | name: impl Into<Cow<'static, str>>, |
402 | 5.60k | value: impl Into<Cow<'static, str>>, |
403 | 5.60k | ) -> Self { |
404 | 5.60k | let name = name.into(); |
405 | 5.60k | let value = value.into(); |
406 | 5.60k | Self { name, value } |
407 | 5.60k | } |
408 | | |
409 | | /// Returns the name of this label |
410 | 0 | pub fn name(&self) -> &str { |
411 | 0 | self.name.as_ref() |
412 | 0 | } |
413 | | |
414 | | /// Returns the value of this label |
415 | 0 | pub fn value(&self) -> &str { |
416 | 0 | self.value.as_ref() |
417 | 0 | } |
418 | | } |
419 | | |
420 | | impl Display for Label { |
421 | 4 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
422 | 4 | write!(f, "{}={}", self.name, self.value) |
423 | 4 | } |
424 | | } |
425 | | |
426 | | #[cfg(test)] |
427 | | mod tests { |
428 | | use std::time::Duration; |
429 | | |
430 | | use chrono::{TimeZone, Utc}; |
431 | | |
432 | | use super::*; |
433 | | |
434 | | #[test] |
435 | 1 | fn test_display_no_labels_no_partition() { |
436 | 1 | let count = Count::new(); |
437 | 1 | count.add(33); |
438 | 1 | let value = MetricValue::OutputRows(count); |
439 | 1 | let partition = None; |
440 | 1 | let metric = Metric::new(value, partition); |
441 | 1 | |
442 | 1 | assert_eq!("output_rows=33", metric.to_string()) |
443 | 1 | } |
444 | | |
445 | | #[test] |
446 | 1 | fn test_display_no_labels_with_partition() { |
447 | 1 | let count = Count::new(); |
448 | 1 | count.add(44); |
449 | 1 | let value = MetricValue::OutputRows(count); |
450 | 1 | let partition = Some(1); |
451 | 1 | let metric = Metric::new(value, partition); |
452 | 1 | |
453 | 1 | assert_eq!("output_rows{partition=1}=44", metric.to_string()) |
454 | 1 | } |
455 | | |
456 | | #[test] |
457 | 1 | fn test_display_labels_no_partition() { |
458 | 1 | let count = Count::new(); |
459 | 1 | count.add(55); |
460 | 1 | let value = MetricValue::OutputRows(count); |
461 | 1 | let partition = None; |
462 | 1 | let label = Label::new("foo", "bar"); |
463 | 1 | let metric = Metric::new_with_labels(value, partition, vec![label]); |
464 | 1 | |
465 | 1 | assert_eq!("output_rows{foo=bar}=55", metric.to_string()) |
466 | 1 | } |
467 | | |
468 | | #[test] |
469 | 1 | fn test_display_labels_and_partition() { |
470 | 1 | let count = Count::new(); |
471 | 1 | count.add(66); |
472 | 1 | let value = MetricValue::OutputRows(count); |
473 | 1 | let partition = Some(2); |
474 | 1 | let label = Label::new("foo", "bar"); |
475 | 1 | let metric = Metric::new_with_labels(value, partition, vec![label]); |
476 | 1 | |
477 | 1 | assert_eq!("output_rows{partition=2, foo=bar}=66", metric.to_string()) |
478 | 1 | } |
479 | | |
480 | | #[test] |
481 | 1 | fn test_output_rows() { |
482 | 1 | let metrics = ExecutionPlanMetricsSet::new(); |
483 | 1 | assert!(metrics.clone_inner().output_rows().is_none()); |
484 | | |
485 | 1 | let partition = 1; |
486 | 1 | let output_rows = MetricBuilder::new(&metrics).output_rows(partition); |
487 | 1 | output_rows.add(13); |
488 | 1 | |
489 | 1 | let output_rows = MetricBuilder::new(&metrics).output_rows(partition + 1); |
490 | 1 | output_rows.add(7); |
491 | 1 | assert_eq!(metrics.clone_inner().output_rows().unwrap(), 20); |
492 | 1 | } |
493 | | |
494 | | #[test] |
495 | 1 | fn test_elapsed_compute() { |
496 | 1 | let metrics = ExecutionPlanMetricsSet::new(); |
497 | 1 | assert!(metrics.clone_inner().elapsed_compute().is_none()); |
498 | | |
499 | 1 | let partition = 1; |
500 | 1 | let elapsed_compute = MetricBuilder::new(&metrics).elapsed_compute(partition); |
501 | 1 | elapsed_compute.add_duration(Duration::from_nanos(1234)); |
502 | 1 | |
503 | 1 | let elapsed_compute = MetricBuilder::new(&metrics).elapsed_compute(partition + 1); |
504 | 1 | elapsed_compute.add_duration(Duration::from_nanos(6)); |
505 | 1 | assert_eq!(metrics.clone_inner().elapsed_compute().unwrap(), 1240); |
506 | 1 | } |
507 | | |
508 | | #[test] |
509 | 1 | fn test_sum() { |
510 | 1 | let metrics = ExecutionPlanMetricsSet::new(); |
511 | 1 | |
512 | 1 | let count1 = MetricBuilder::new(&metrics) |
513 | 1 | .with_new_label("foo", "bar") |
514 | 1 | .counter("my_counter", 1); |
515 | 1 | count1.add(1); |
516 | 1 | |
517 | 1 | let count2 = MetricBuilder::new(&metrics).counter("my_counter", 2); |
518 | 1 | count2.add(2); |
519 | 1 | |
520 | 1 | let metrics = metrics.clone_inner(); |
521 | 2 | assert!(metrics.sum(|_| false).is_none())1 ; |
522 | | |
523 | 1 | let expected_count = Count::new(); |
524 | 1 | expected_count.add(3); |
525 | 1 | let expected_sum = MetricValue::Count { |
526 | 1 | name: "my_counter".into(), |
527 | 1 | count: expected_count, |
528 | 1 | }; |
529 | 1 | |
530 | 2 | assert_eq!(metrics.sum(|_| true), Some(expected_sum))1 ; |
531 | 1 | } |
532 | | |
533 | | #[test] |
534 | | #[should_panic(expected = "Mismatched metric types. Can not aggregate Count")] |
535 | 1 | fn test_bad_sum() { |
536 | 1 | // can not add different kinds of metrics |
537 | 1 | let metrics = ExecutionPlanMetricsSet::new(); |
538 | 1 | |
539 | 1 | let count = MetricBuilder::new(&metrics).counter("my_metric", 1); |
540 | 1 | count.add(1); |
541 | 1 | |
542 | 1 | let time = MetricBuilder::new(&metrics).subset_time("my_metric", 1); |
543 | 1 | time.add_duration(Duration::from_nanos(10)); |
544 | 1 | |
545 | 1 | // expect that this will error out |
546 | 2 | metrics.clone_inner().sum(|_| true); |
547 | 1 | } |
548 | | |
549 | | #[test] |
550 | 1 | fn test_aggregate_by_name() { |
551 | 1 | let metrics = ExecutionPlanMetricsSet::new(); |
552 | 1 | |
553 | 1 | // Note cpu_time1 has labels but it is still aggregated with metrics 2 and 3 |
554 | 1 | let elapsed_compute1 = MetricBuilder::new(&metrics) |
555 | 1 | .with_new_label("foo", "bar") |
556 | 1 | .elapsed_compute(1); |
557 | 1 | elapsed_compute1.add_duration(Duration::from_nanos(12)); |
558 | 1 | |
559 | 1 | let elapsed_compute2 = MetricBuilder::new(&metrics).elapsed_compute(2); |
560 | 1 | elapsed_compute2.add_duration(Duration::from_nanos(34)); |
561 | 1 | |
562 | 1 | let elapsed_compute3 = MetricBuilder::new(&metrics).elapsed_compute(4); |
563 | 1 | elapsed_compute3.add_duration(Duration::from_nanos(56)); |
564 | 1 | |
565 | 1 | let output_rows = MetricBuilder::new(&metrics).output_rows(1); // output rows |
566 | 1 | output_rows.add(56); |
567 | 1 | |
568 | 1 | let aggregated = metrics.clone_inner().aggregate_by_name(); |
569 | 1 | |
570 | 1 | // cpu time should be aggregated: |
571 | 1 | let elapsed_computes = aggregated |
572 | 1 | .iter() |
573 | 2 | .filter(|metric| matches!(metric.value(), MetricValue::ElapsedCompute(_))) |
574 | 1 | .collect::<Vec<_>>(); |
575 | 1 | assert_eq!(elapsed_computes.len(), 1); |
576 | 1 | assert_eq!(elapsed_computes[0].value().as_usize(), 12 + 34 + 56); |
577 | 1 | assert!(elapsed_computes[0].partition().is_none()); |
578 | | |
579 | | // output rows should |
580 | 1 | let output_rows = aggregated |
581 | 1 | .iter() |
582 | 2 | .filter(|metric| matches!(metric.value(), MetricValue::OutputRows(_))) |
583 | 1 | .collect::<Vec<_>>(); |
584 | 1 | assert_eq!(output_rows.len(), 1); |
585 | 1 | assert_eq!(output_rows[0].value().as_usize(), 56); |
586 | 1 | assert!(output_rows[0].partition.is_none()) |
587 | 1 | } |
588 | | |
589 | | #[test] |
590 | | #[should_panic(expected = "Mismatched metric types. Can not aggregate Count")] |
591 | 1 | fn test_aggregate_partition_bad_sum() { |
592 | 1 | let metrics = ExecutionPlanMetricsSet::new(); |
593 | 1 | |
594 | 1 | let count = MetricBuilder::new(&metrics).counter("my_metric", 1); |
595 | 1 | count.add(1); |
596 | 1 | |
597 | 1 | let time = MetricBuilder::new(&metrics).subset_time("my_metric", 1); |
598 | 1 | time.add_duration(Duration::from_nanos(10)); |
599 | 1 | |
600 | 1 | // can't aggregate time and count -- expect a panic |
601 | 1 | metrics.clone_inner().aggregate_by_name(); |
602 | 1 | } |
603 | | |
604 | | #[test] |
605 | 1 | fn test_aggregate_partition_timestamps() { |
606 | 1 | let metrics = ExecutionPlanMetricsSet::new(); |
607 | 1 | |
608 | 1 | // 1431648000000000 == 1970-01-17 13:40:48 UTC |
609 | 1 | let t1 = Utc.timestamp_nanos(1431648000000000); |
610 | 1 | // 1531648000000000 == 1970-01-18 17:27:28 UTC |
611 | 1 | let t2 = Utc.timestamp_nanos(1531648000000000); |
612 | 1 | // 1631648000000000 == 1970-01-19 21:14:08 UTC |
613 | 1 | let t3 = Utc.timestamp_nanos(1631648000000000); |
614 | 1 | // 1731648000000000 == 1970-01-21 01:00:48 UTC |
615 | 1 | let t4 = Utc.timestamp_nanos(1731648000000000); |
616 | 1 | |
617 | 1 | let start_timestamp0 = MetricBuilder::new(&metrics).start_timestamp(0); |
618 | 1 | start_timestamp0.set(t1); |
619 | 1 | let end_timestamp0 = MetricBuilder::new(&metrics).end_timestamp(0); |
620 | 1 | end_timestamp0.set(t2); |
621 | 1 | let start_timestamp1 = MetricBuilder::new(&metrics).start_timestamp(0); |
622 | 1 | start_timestamp1.set(t3); |
623 | 1 | let end_timestamp1 = MetricBuilder::new(&metrics).end_timestamp(0); |
624 | 1 | end_timestamp1.set(t4); |
625 | 1 | |
626 | 1 | // aggregate |
627 | 1 | let aggregated = metrics.clone_inner().aggregate_by_name(); |
628 | 1 | |
629 | 1 | let mut ts = aggregated |
630 | 1 | .iter() |
631 | 2 | .filter(|metric| { |
632 | 2 | matches!1 (metric.value(), MetricValue::StartTimestamp(_)) |
633 | 1 | && metric.labels().is_empty() |
634 | 2 | }) |
635 | 1 | .collect::<Vec<_>>(); |
636 | 1 | assert_eq!(ts.len(), 1); |
637 | 1 | match ts.remove(0).value() { |
638 | 1 | MetricValue::StartTimestamp(ts) => { |
639 | 1 | // expect earliest of t1, t2 |
640 | 1 | assert_eq!(ts.value(), Some(t1)); |
641 | | } |
642 | | _ => { |
643 | 0 | panic!("Not a timestamp"); |
644 | | } |
645 | | }; |
646 | | |
647 | 1 | let mut ts = aggregated |
648 | 1 | .iter() |
649 | 2 | .filter(|metric| { |
650 | 2 | matches!1 (metric.value(), MetricValue::EndTimestamp(_)) |
651 | 1 | && metric.labels().is_empty() |
652 | 2 | }) |
653 | 1 | .collect::<Vec<_>>(); |
654 | 1 | assert_eq!(ts.len(), 1); |
655 | 1 | match ts.remove(0).value() { |
656 | 1 | MetricValue::EndTimestamp(ts) => { |
657 | 1 | // expect latest of t3, t4 |
658 | 1 | assert_eq!(ts.value(), Some(t4)); |
659 | | } |
660 | | _ => { |
661 | 0 | panic!("Not a timestamp"); |
662 | | } |
663 | | }; |
664 | 1 | } |
665 | | |
666 | | #[test] |
667 | 1 | fn test_sorted_for_display() { |
668 | 1 | let metrics = ExecutionPlanMetricsSet::new(); |
669 | 1 | MetricBuilder::new(&metrics).end_timestamp(0); |
670 | 1 | MetricBuilder::new(&metrics).start_timestamp(0); |
671 | 1 | MetricBuilder::new(&metrics).elapsed_compute(0); |
672 | 1 | MetricBuilder::new(&metrics).counter("the_second_counter", 0); |
673 | 1 | MetricBuilder::new(&metrics).counter("the_counter", 0); |
674 | 1 | MetricBuilder::new(&metrics).counter("the_third_counter", 0); |
675 | 1 | MetricBuilder::new(&metrics).subset_time("the_time", 0); |
676 | 1 | MetricBuilder::new(&metrics).output_rows(0); |
677 | 1 | let metrics = metrics.clone_inner(); |
678 | | |
679 | 2 | fn metric_names(metrics: &MetricsSet) -> String { |
680 | 16 | let n = metrics.iter().map(|m| m.value().name()).collect::<Vec<_>>(); |
681 | 2 | n.join(", ") |
682 | 2 | } |
683 | | |
684 | 1 | assert_eq!("end_timestamp, start_timestamp, elapsed_compute, the_second_counter, the_counter, the_third_counter, the_time, output_rows", metric_names(&metrics)); |
685 | | |
686 | 1 | let metrics = metrics.sorted_for_display(); |
687 | 1 | assert_eq!("output_rows, elapsed_compute, the_counter, the_second_counter, the_third_counter, the_time, start_timestamp, end_timestamp", metric_names(&metrics)); |
688 | 1 | } |
689 | | } |