Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/display.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Implementation of physical plan display. See
19
//! [`crate::displayable`] for examples of how to format
20
21
use std::fmt;
22
use std::fmt::Formatter;
23
24
use arrow_schema::SchemaRef;
25
26
use datafusion_common::display::{GraphvizBuilder, PlanType, StringifiedPlan};
27
use datafusion_expr::display_schema;
28
use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
29
30
use super::{accept, ExecutionPlan, ExecutionPlanVisitor};
31
32
/// Options for controlling how each [`ExecutionPlan`] should format itself
33
#[derive(Debug, Clone, Copy)]
34
pub enum DisplayFormatType {
35
    /// Default, compact format. Example: `FilterExec: c12 < 10.0`
36
    Default,
37
    /// Verbose, showing all available details
38
    Verbose,
39
}
40
41
/// Wraps an `ExecutionPlan` with various ways to display this plan
42
#[derive(Debug, Clone)]
43
pub struct DisplayableExecutionPlan<'a> {
44
    inner: &'a dyn ExecutionPlan,
45
    /// How to show metrics
46
    show_metrics: ShowMetrics,
47
    /// If statistics should be displayed
48
    show_statistics: bool,
49
    /// If schema should be displayed. See [`Self::set_show_schema`]
50
    show_schema: bool,
51
}
52
53
impl<'a> DisplayableExecutionPlan<'a> {
54
    /// Create a wrapper around an [`ExecutionPlan`] which can be
55
    /// pretty printed in a variety of ways
56
11
    pub fn new(inner: &'a dyn ExecutionPlan) -> Self {
57
11
        Self {
58
11
            inner,
59
11
            show_metrics: ShowMetrics::None,
60
11
            show_statistics: false,
61
11
            show_schema: false,
62
11
        }
63
11
    }
64
65
    /// Create a wrapper around an [`ExecutionPlan`] which can be
66
    /// pretty printed in a variety of ways that also shows aggregated
67
    /// metrics
68
0
    pub fn with_metrics(inner: &'a dyn ExecutionPlan) -> Self {
69
0
        Self {
70
0
            inner,
71
0
            show_metrics: ShowMetrics::Aggregated,
72
0
            show_statistics: false,
73
0
            show_schema: false,
74
0
        }
75
0
    }
76
77
    /// Create a wrapper around an [`ExecutionPlan`] which can be
78
    /// pretty printed in a variety of ways that also shows all low
79
    /// level metrics
80
0
    pub fn with_full_metrics(inner: &'a dyn ExecutionPlan) -> Self {
81
0
        Self {
82
0
            inner,
83
0
            show_metrics: ShowMetrics::Full,
84
0
            show_statistics: false,
85
0
            show_schema: false,
86
0
        }
87
0
    }
88
89
    /// Enable display of schema
90
    ///
91
    /// If true, plans will be displayed with schema information at the end
92
    /// of each line. The format is `schema=[[a:Int32;N, b:Int32;N, c:Int32;N]]`
93
0
    pub fn set_show_schema(mut self, show_schema: bool) -> Self {
94
0
        self.show_schema = show_schema;
95
0
        self
96
0
    }
97
98
    /// Enable display of statistics
99
6
    pub fn set_show_statistics(mut self, show_statistics: bool) -> Self {
100
6
        self.show_statistics = show_statistics;
101
6
        self
102
6
    }
103
104
    /// Return a `format`able structure that produces a single line
105
    /// per node.
106
    ///
107
    /// ```text
108
    /// ProjectionExec: expr=[a]
109
    ///   CoalesceBatchesExec: target_batch_size=8192
110
    ///     FilterExec: a < 5
111
    ///       RepartitionExec: partitioning=RoundRobinBatch(16)
112
    ///         CsvExec: source=...",
113
    /// ```
114
5
    pub fn indent(&self, verbose: bool) -> impl fmt::Display + 'a {
115
5
        let format_type = if verbose {
116
5
            DisplayFormatType::Verbose
117
        } else {
118
0
            DisplayFormatType::Default
119
        };
120
        struct Wrapper<'a> {
121
            format_type: DisplayFormatType,
122
            plan: &'a dyn ExecutionPlan,
123
            show_metrics: ShowMetrics,
124
            show_statistics: bool,
125
            show_schema: bool,
126
        }
127
        impl<'a> fmt::Display for Wrapper<'a> {
128
5
            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
129
5
                let mut visitor = IndentVisitor {
130
5
                    t: self.format_type,
131
5
                    f,
132
5
                    indent: 0,
133
5
                    show_metrics: self.show_metrics,
134
5
                    show_statistics: self.show_statistics,
135
5
                    show_schema: self.show_schema,
136
5
                };
137
5
                accept(self.plan, &mut visitor)
138
5
            }
139
        }
140
5
        Wrapper {
141
5
            format_type,
142
5
            plan: self.inner,
143
5
            show_metrics: self.show_metrics,
144
5
            show_statistics: self.show_statistics,
145
5
            show_schema: self.show_schema,
146
5
        }
147
5
    }
148
149
    /// Returns a `format`able structure that produces graphviz format for execution plan, which can
150
    /// be directly visualized [here](https://dreampuf.github.io/GraphvizOnline).
151
    ///
152
    /// An example is
153
    /// ```dot
154
    /// strict digraph dot_plan {
155
    //     0[label="ProjectionExec: expr=[id@0 + 2 as employee.id + Int32(2)]",tooltip=""]
156
    //     1[label="EmptyExec",tooltip=""]
157
    //     0 -> 1
158
    // }
159
    /// ```
160
0
    pub fn graphviz(&self) -> impl fmt::Display + 'a {
161
        struct Wrapper<'a> {
162
            plan: &'a dyn ExecutionPlan,
163
            show_metrics: ShowMetrics,
164
            show_statistics: bool,
165
        }
166
        impl<'a> fmt::Display for Wrapper<'a> {
167
0
            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
168
0
                let t = DisplayFormatType::Default;
169
0
170
0
                let mut visitor = GraphvizVisitor {
171
0
                    f,
172
0
                    t,
173
0
                    show_metrics: self.show_metrics,
174
0
                    show_statistics: self.show_statistics,
175
0
                    graphviz_builder: GraphvizBuilder::default(),
176
0
                    parents: Vec::new(),
177
0
                };
178
0
179
0
                visitor.start_graph()?;
180
181
0
                accept(self.plan, &mut visitor)?;
182
183
0
                visitor.end_graph()?;
184
0
                Ok(())
185
0
            }
186
        }
187
188
0
        Wrapper {
189
0
            plan: self.inner,
190
0
            show_metrics: self.show_metrics,
191
0
            show_statistics: self.show_statistics,
192
0
        }
193
0
    }
194
195
    /// Return a single-line summary of the root of the plan
196
    /// Example: `ProjectionExec: expr=[a@0 as a]`.
197
6
    pub fn one_line(&self) -> impl fmt::Display + 'a {
198
        struct Wrapper<'a> {
199
            plan: &'a dyn ExecutionPlan,
200
            show_metrics: ShowMetrics,
201
            show_statistics: bool,
202
            show_schema: bool,
203
        }
204
205
        impl<'a> fmt::Display for Wrapper<'a> {
206
6
            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207
6
                let mut visitor = IndentVisitor {
208
6
                    f,
209
6
                    t: DisplayFormatType::Default,
210
6
                    indent: 0,
211
6
                    show_metrics: self.show_metrics,
212
6
                    show_statistics: self.show_statistics,
213
6
                    show_schema: self.show_schema,
214
6
                };
215
6
                visitor.pre_visit(self.plan)
?1
;
216
5
                Ok(())
217
6
            }
218
        }
219
220
6
        Wrapper {
221
6
            plan: self.inner,
222
6
            show_metrics: self.show_metrics,
223
6
            show_statistics: self.show_statistics,
224
6
            show_schema: self.show_schema,
225
6
        }
226
6
    }
227
228
    /// format as a `StringifiedPlan`
229
0
    pub fn to_stringified(&self, verbose: bool, plan_type: PlanType) -> StringifiedPlan {
230
0
        StringifiedPlan::new(plan_type, self.indent(verbose).to_string())
231
0
    }
232
}
233
234
#[derive(Debug, Clone, Copy)]
235
enum ShowMetrics {
236
    /// Do not show any metrics
237
    None,
238
239
    /// Show aggregated metrics across partition
240
    Aggregated,
241
242
    /// Show full per-partition metrics
243
    Full,
244
}
245
246
/// Formats plans with a single line per node.
247
///
248
/// # Example
249
///
250
/// ```text
251
/// ProjectionExec: expr=[column1@0 + 2 as column1 + Int64(2)]
252
///   FilterExec: column1@0 = 5
253
///     ValuesExec
254
/// ```
255
struct IndentVisitor<'a, 'b> {
256
    /// How to format each node
257
    t: DisplayFormatType,
258
    /// Write to this formatter
259
    f: &'a mut fmt::Formatter<'b>,
260
    /// Indent size
261
    indent: usize,
262
    /// How to show metrics
263
    show_metrics: ShowMetrics,
264
    /// If statistics should be displayed
265
    show_statistics: bool,
266
    /// If schema should be displayed
267
    show_schema: bool,
268
}
269
270
impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> {
271
    type Error = fmt::Error;
272
21
    fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
273
21
        write!(self.f, "{:indent$}", "", indent = self.indent * 2)
?0
;
274
21
        plan.fmt_as(self.t, self.f)
?0
;
275
21
        match self.show_metrics {
276
21
            ShowMetrics::None => {}
277
            ShowMetrics::Aggregated => {
278
0
                if let Some(metrics) = plan.metrics() {
279
0
                    let metrics = metrics
280
0
                        .aggregate_by_name()
281
0
                        .sorted_for_display()
282
0
                        .timestamps_removed();
283
0
284
0
                    write!(self.f, ", metrics=[{metrics}]")?;
285
                } else {
286
0
                    write!(self.f, ", metrics=[]")?;
287
                }
288
            }
289
            ShowMetrics::Full => {
290
0
                if let Some(metrics) = plan.metrics() {
291
0
                    write!(self.f, ", metrics=[{metrics}]")?;
292
                } else {
293
0
                    write!(self.f, ", metrics=[]")?;
294
                }
295
            }
296
        }
297
21
        if self.show_statistics {
298
2
            let 
stats1
= plan.statistics().map_err(|_e|
fmt::Error1
)
?1
;
299
1
            write!(self.f, ", statistics=[{}]", stats)
?0
;
300
19
        }
301
19
        if self.show_schema {
302
0
            write!(
303
0
                self.f,
304
0
                ", schema={}",
305
0
                display_schema(plan.schema().as_ref())
306
0
            )?;
307
19
        }
308
19
        writeln!(self.f)
?0
;
309
19
        self.indent += 1;
310
19
        Ok(true)
311
20
    }
312
313
15
    fn post_visit(&mut self, _plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
314
15
        self.indent -= 1;
315
15
        Ok(true)
316
15
    }
317
}
318
319
struct GraphvizVisitor<'a, 'b> {
320
    f: &'a mut fmt::Formatter<'b>,
321
    /// How to format each node
322
    t: DisplayFormatType,
323
    /// How to show metrics
324
    show_metrics: ShowMetrics,
325
    /// If statistics should be displayed
326
    show_statistics: bool,
327
328
    graphviz_builder: GraphvizBuilder,
329
    /// Used to record parent node ids when visiting a plan.
330
    parents: Vec<usize>,
331
}
332
333
impl GraphvizVisitor<'_, '_> {
334
0
    fn start_graph(&mut self) -> fmt::Result {
335
0
        self.graphviz_builder.start_graph(self.f)
336
0
    }
337
338
0
    fn end_graph(&mut self) -> fmt::Result {
339
0
        self.graphviz_builder.end_graph(self.f)
340
0
    }
341
}
342
343
impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> {
344
    type Error = fmt::Error;
345
346
0
    fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
347
0
        let id = self.graphviz_builder.next_id();
348
349
        struct Wrapper<'a>(&'a dyn ExecutionPlan, DisplayFormatType);
350
351
        impl<'a> std::fmt::Display for Wrapper<'a> {
352
0
            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353
0
                self.0.fmt_as(self.1, f)
354
0
            }
355
        }
356
357
0
        let label = { format!("{}", Wrapper(plan, self.t)) };
358
359
0
        let metrics = match self.show_metrics {
360
0
            ShowMetrics::None => "".to_string(),
361
            ShowMetrics::Aggregated => {
362
0
                if let Some(metrics) = plan.metrics() {
363
0
                    let metrics = metrics
364
0
                        .aggregate_by_name()
365
0
                        .sorted_for_display()
366
0
                        .timestamps_removed();
367
0
368
0
                    format!("metrics=[{metrics}]")
369
                } else {
370
0
                    "metrics=[]".to_string()
371
                }
372
            }
373
            ShowMetrics::Full => {
374
0
                if let Some(metrics) = plan.metrics() {
375
0
                    format!("metrics=[{metrics}]")
376
                } else {
377
0
                    "metrics=[]".to_string()
378
                }
379
            }
380
        };
381
382
0
        let statistics = if self.show_statistics {
383
0
            let stats = plan.statistics().map_err(|_e| fmt::Error)?;
384
0
            format!("statistics=[{}]", stats)
385
        } else {
386
0
            "".to_string()
387
        };
388
389
0
        let delimiter = if !metrics.is_empty() && !statistics.is_empty() {
390
0
            ", "
391
        } else {
392
0
            ""
393
        };
394
395
0
        self.graphviz_builder.add_node(
396
0
            self.f,
397
0
            id,
398
0
            &label,
399
0
            Some(&format!("{}{}{}", metrics, delimiter, statistics)),
400
0
        )?;
401
402
0
        if let Some(parent_node_id) = self.parents.last() {
403
0
            self.graphviz_builder
404
0
                .add_edge(self.f, *parent_node_id, id)?;
405
0
        }
406
407
0
        self.parents.push(id);
408
0
409
0
        Ok(true)
410
0
    }
411
412
0
    fn post_visit(&mut self, _plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
413
0
        self.parents.pop();
414
0
        Ok(true)
415
0
    }
416
}
417
418
/// Trait for types which could have additional details when formatted in `Verbose` mode
419
pub trait DisplayAs {
420
    /// Format according to `DisplayFormatType`, used when verbose representation looks
421
    /// different from the default one
422
    ///
423
    /// Should not include a newline
424
    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result;
425
}
426
427
/// A newtype wrapper to display `T` implementing`DisplayAs` using the `Default` mode
428
pub struct DefaultDisplay<T>(pub T);
429
430
impl<T: DisplayAs> fmt::Display for DefaultDisplay<T> {
431
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
432
0
        self.0.fmt_as(DisplayFormatType::Default, f)
433
0
    }
434
}
435
436
/// A newtype wrapper to display `T` implementing `DisplayAs` using the `Verbose` mode
437
pub struct VerboseDisplay<T>(pub T);
438
439
impl<T: DisplayAs> fmt::Display for VerboseDisplay<T> {
440
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
441
0
        self.0.fmt_as(DisplayFormatType::Verbose, f)
442
0
    }
443
}
444
445
/// A wrapper to customize partitioned file display
446
#[derive(Debug)]
447
pub struct ProjectSchemaDisplay<'a>(pub &'a SchemaRef);
448
449
impl<'a> fmt::Display for ProjectSchemaDisplay<'a> {
450
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
451
1
        let parts: Vec<_> = self
452
1
            .0
453
1
            .fields()
454
1
            .iter()
455
2
            .map(|x| x.name().to_owned())
456
1
            .collect::<Vec<String>>();
457
1
        write!(f, "[{}]", parts.join(", "))
458
1
    }
459
}
460
461
/// A wrapper to customize output ordering display.
462
#[derive(Debug)]
463
pub struct OutputOrderingDisplay<'a>(pub &'a [PhysicalSortExpr]);
464
465
impl<'a> fmt::Display for OutputOrderingDisplay<'a> {
466
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
467
1
        write!(f, "[")
?0
;
468
1
        for (i, e) in self.0.iter().enumerate() {
469
1
            if i > 0 {
470
0
                write!(f, ", ")?
471
1
            }
472
1
            write!(f, "{e}")
?0
;
473
        }
474
1
        write!(f, "]")
475
1
    }
476
}
477
478
1
pub fn display_orderings(f: &mut Formatter, orderings: &[LexOrdering]) -> fmt::Result {
479
1
    if let Some(ordering) = orderings.first() {
480
1
        if !ordering.is_empty() {
481
1
            let start = if orderings.len() == 1 {
482
1
                ", output_ordering="
483
            } else {
484
0
                ", output_orderings=["
485
            };
486
1
            write!(f, "{}", start)
?0
;
487
1
            for (idx, ordering) in
488
1
                orderings.iter().enumerate().filter(|(_, o)| !o.is_empty())
489
            {
490
1
                match idx {
491
1
                    0 => write!(f, "{}", OutputOrderingDisplay(ordering))
?0
,
492
0
                    _ => write!(f, ", {}", OutputOrderingDisplay(ordering))?,
493
                }
494
            }
495
1
            let end = if orderings.len() == 1 { "" } else { 
"]"0
};
496
1
            write!(f, "{}", end)
?0
;
497
0
        }
498
0
    }
499
500
1
    Ok(())
501
1
}
502
503
#[cfg(test)]
504
mod tests {
505
    use std::fmt::Write;
506
    use std::sync::Arc;
507
508
    use datafusion_common::{DataFusionError, Result, Statistics};
509
    use datafusion_execution::{SendableRecordBatchStream, TaskContext};
510
511
    use crate::{DisplayAs, ExecutionPlan, PlanProperties};
512
513
    use super::DisplayableExecutionPlan;
514
515
    #[derive(Debug, Clone, Copy)]
516
    enum TestStatsExecPlan {
517
        Panic,
518
        Error,
519
        Ok,
520
    }
521
522
    impl DisplayAs for TestStatsExecPlan {
523
6
        fn fmt_as(
524
6
            &self,
525
6
            _t: crate::DisplayFormatType,
526
6
            f: &mut std::fmt::Formatter,
527
6
        ) -> std::fmt::Result {
528
6
            write!(f, "TestStatsExecPlan")
529
6
        }
530
    }
531
532
    impl ExecutionPlan for TestStatsExecPlan {
533
0
        fn name(&self) -> &'static str {
534
0
            "TestStatsExecPlan"
535
0
        }
536
537
0
        fn as_any(&self) -> &dyn std::any::Any {
538
0
            self
539
0
        }
540
541
0
        fn properties(&self) -> &PlanProperties {
542
0
            unimplemented!()
543
        }
544
545
0
        fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
546
0
            vec![]
547
0
        }
548
549
0
        fn with_new_children(
550
0
            self: Arc<Self>,
551
0
            _: Vec<Arc<dyn ExecutionPlan>>,
552
0
        ) -> Result<Arc<dyn ExecutionPlan>> {
553
0
            unimplemented!()
554
        }
555
556
0
        fn execute(
557
0
            &self,
558
0
            _: usize,
559
0
            _: Arc<TaskContext>,
560
0
        ) -> Result<SendableRecordBatchStream> {
561
0
            todo!()
562
        }
563
564
2
        fn statistics(&self) -> Result<Statistics> {
565
2
            match self {
566
1
                Self::Panic => panic!("expected panic"),
567
                Self::Error => {
568
1
                    Err(DataFusionError::Internal("expected error".to_string()))
569
                }
570
0
                Self::Ok => Ok(Statistics::new_unknown(self.schema().as_ref())),
571
            }
572
1
        }
573
    }
574
575
6
    fn test_stats_display(exec: TestStatsExecPlan, show_stats: bool) {
576
6
        let display =
577
6
            DisplayableExecutionPlan::new(&exec).set_show_statistics(show_stats);
578
6
579
6
        let mut buf = String::new();
580
6
        write!(&mut buf, "{}", display.one_line()).unwrap();
581
6
        let buf = buf.trim();
582
6
        assert_eq!(buf, "TestStatsExecPlan");
583
4
    }
584
585
    #[test]
586
1
    fn test_display_when_stats_panic_with_no_show_stats() {
587
1
        test_stats_display(TestStatsExecPlan::Panic, false);
588
1
    }
589
590
    #[test]
591
1
    fn test_display_when_stats_error_with_no_show_stats() {
592
1
        test_stats_display(TestStatsExecPlan::Error, false);
593
1
    }
594
595
    #[test]
596
1
    fn test_display_when_stats_ok_with_no_show_stats() {
597
1
        test_stats_display(TestStatsExecPlan::Ok, false);
598
1
    }
599
600
    #[test]
601
    #[should_panic(expected = "expected panic")]
602
1
    fn test_display_when_stats_panic_with_show_stats() {
603
1
        test_stats_display(TestStatsExecPlan::Panic, true);
604
1
    }
605
606
    #[test]
607
    #[should_panic(expected = "Error")] // fmt::Error
608
1
    fn test_display_when_stats_error_with_show_stats() {
609
1
        test_stats_display(TestStatsExecPlan::Error, true);
610
1
    }
611
612
    #[test]
613
1
    fn test_display_when_stats_ok_with_show_stats() {
614
1
        test_stats_display(TestStatsExecPlan::Ok, false);
615
1
    }
616
}