/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/execution_plan.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use std::any::Any; |
19 | | use std::fmt::Debug; |
20 | | use std::sync::Arc; |
21 | | |
22 | | use arrow::datatypes::SchemaRef; |
23 | | use arrow::record_batch::RecordBatch; |
24 | | use futures::stream::{StreamExt, TryStreamExt}; |
25 | | use tokio::task::JoinSet; |
26 | | |
27 | | use datafusion_common::config::ConfigOptions; |
28 | | pub use datafusion_common::hash_utils; |
29 | | pub use datafusion_common::utils::project_schema; |
30 | | use datafusion_common::{exec_err, Result}; |
31 | | pub use datafusion_common::{internal_err, ColumnStatistics, Statistics}; |
32 | | use datafusion_execution::TaskContext; |
33 | | pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream}; |
34 | | pub use datafusion_expr::{Accumulator, ColumnarValue}; |
35 | | pub use datafusion_physical_expr::window::WindowExpr; |
36 | | pub use datafusion_physical_expr::{ |
37 | | expressions, udf, Distribution, Partitioning, PhysicalExpr, |
38 | | }; |
39 | | use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr}; |
40 | | use datafusion_physical_expr_common::sort_expr::LexRequirement; |
41 | | |
42 | | use crate::coalesce_partitions::CoalescePartitionsExec; |
43 | | use crate::display::DisplayableExecutionPlan; |
44 | | pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay}; |
45 | | pub use crate::metrics::Metric; |
46 | | use crate::metrics::MetricsSet; |
47 | | pub use crate::ordering::InputOrderMode; |
48 | | use crate::repartition::RepartitionExec; |
49 | | use crate::sorts::sort_preserving_merge::SortPreservingMergeExec; |
50 | | pub use crate::stream::EmptyRecordBatchStream; |
51 | | use crate::stream::RecordBatchStreamAdapter; |
52 | | |
53 | | /// Represent nodes in the DataFusion Physical Plan. |
54 | | /// |
55 | | /// Calling [`execute`] produces an `async` [`SendableRecordBatchStream`] of |
56 | | /// [`RecordBatch`] that incrementally computes a partition of the |
57 | | /// `ExecutionPlan`'s output from its input. See [`Partitioning`] for more |
58 | | /// details on partitioning. |
59 | | /// |
60 | | /// Methods such as [`Self::schema`] and [`Self::properties`] communicate |
61 | | /// properties of the output to the DataFusion optimizer, and methods such as |
62 | | /// [`required_input_distribution`] and [`required_input_ordering`] express |
63 | | /// requirements of the `ExecutionPlan` from its input. |
64 | | /// |
65 | | /// [`ExecutionPlan`] can be displayed in a simplified form using the |
66 | | /// return value from [`displayable`] in addition to the (normally |
67 | | /// quite verbose) `Debug` output. |
68 | | /// |
69 | | /// [`execute`]: ExecutionPlan::execute |
70 | | /// [`required_input_distribution`]: ExecutionPlan::required_input_distribution |
71 | | /// [`required_input_ordering`]: ExecutionPlan::required_input_ordering |
72 | | pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { |
73 | | /// Short name for the ExecutionPlan, such as 'ParquetExec'. |
74 | | /// |
75 | | /// Implementation note: this method can just proxy to |
76 | | /// [`static_name`](ExecutionPlan::static_name) if no special action is |
77 | | /// needed. It doesn't provide a default implementation like that because |
78 | | /// this method doesn't require the `Sized` constrain to allow a wilder |
79 | | /// range of use cases. |
80 | | fn name(&self) -> &str; |
81 | | |
82 | | /// Short name for the ExecutionPlan, such as 'ParquetExec'. |
83 | | /// Like [`name`](ExecutionPlan::name) but can be called without an instance. |
84 | 1 | fn static_name() -> &'static str |
85 | 1 | where |
86 | 1 | Self: Sized, |
87 | 1 | { |
88 | 1 | let full_name = std::any::type_name::<Self>(); |
89 | 1 | let maybe_start_idx = full_name.rfind(':'); |
90 | 1 | match maybe_start_idx { |
91 | 1 | Some(start_idx) => &full_name[start_idx + 1..], |
92 | 0 | None => "UNKNOWN", |
93 | | } |
94 | 1 | } |
95 | | |
96 | | /// Returns the execution plan as [`Any`] so that it can be |
97 | | /// downcast to a specific implementation. |
98 | | fn as_any(&self) -> &dyn Any; |
99 | | |
100 | | /// Get the schema for this execution plan |
101 | 25.7k | fn schema(&self) -> SchemaRef { |
102 | 25.7k | Arc::clone(self.properties().schema()) |
103 | 25.7k | } |
104 | | |
105 | | /// Return properties of the output of the `ExecutionPlan`, such as output |
106 | | /// ordering(s), partitioning information etc. |
107 | | /// |
108 | | /// This information is available via methods on [`ExecutionPlanProperties`] |
109 | | /// trait, which is implemented for all `ExecutionPlan`s. |
110 | | fn properties(&self) -> &PlanProperties; |
111 | | |
112 | | /// Specifies the data distribution requirements for all the |
113 | | /// children for this `ExecutionPlan`, By default it's [[Distribution::UnspecifiedDistribution]] for each child, |
114 | 0 | fn required_input_distribution(&self) -> Vec<Distribution> { |
115 | 0 | vec![Distribution::UnspecifiedDistribution; self.children().len()] |
116 | 0 | } |
117 | | |
118 | | /// Specifies the ordering required for all of the children of this |
119 | | /// `ExecutionPlan`. |
120 | | /// |
121 | | /// For each child, it's the local ordering requirement within |
122 | | /// each partition rather than the global ordering |
123 | | /// |
124 | | /// NOTE that checking `!is_empty()` does **not** check for a |
125 | | /// required input ordering. Instead, the correct check is that at |
126 | | /// least one entry must be `Some` |
127 | 0 | fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> { |
128 | 0 | vec![None; self.children().len()] |
129 | 0 | } |
130 | | |
131 | | /// Returns `false` if this `ExecutionPlan`'s implementation may reorder |
132 | | /// rows within or between partitions. |
133 | | /// |
134 | | /// For example, Projection, Filter, and Limit maintain the order |
135 | | /// of inputs -- they may transform values (Projection) or not |
136 | | /// produce the same number of rows that went in (Filter and |
137 | | /// Limit), but the rows that are produced go in the same way. |
138 | | /// |
139 | | /// DataFusion uses this metadata to apply certain optimizations |
140 | | /// such as automatically repartitioning correctly. |
141 | | /// |
142 | | /// The default implementation returns `false` |
143 | | /// |
144 | | /// WARNING: if you override this default, you *MUST* ensure that |
145 | | /// the `ExecutionPlan`'s maintains the ordering invariant or else |
146 | | /// DataFusion may produce incorrect results. |
147 | 0 | fn maintains_input_order(&self) -> Vec<bool> { |
148 | 0 | vec![false; self.children().len()] |
149 | 0 | } |
150 | | |
151 | | /// Specifies whether the `ExecutionPlan` benefits from increased |
152 | | /// parallelization at its input for each child. |
153 | | /// |
154 | | /// If returns `true`, the `ExecutionPlan` would benefit from partitioning |
155 | | /// its corresponding child (and thus from more parallelism). For |
156 | | /// `ExecutionPlan` that do very little work the overhead of extra |
157 | | /// parallelism may outweigh any benefits |
158 | | /// |
159 | | /// The default implementation returns `true` unless this `ExecutionPlan` |
160 | | /// has signalled it requires a single child input partition. |
161 | 0 | fn benefits_from_input_partitioning(&self) -> Vec<bool> { |
162 | 0 | // By default try to maximize parallelism with more CPUs if |
163 | 0 | // possible |
164 | 0 | self.required_input_distribution() |
165 | 0 | .into_iter() |
166 | 0 | .map(|dist| !matches!(dist, Distribution::SinglePartition)) |
167 | 0 | .collect() |
168 | 0 | } |
169 | | |
170 | | /// Get a list of children `ExecutionPlan`s that act as inputs to this plan. |
171 | | /// The returned list will be empty for leaf nodes such as scans, will contain |
172 | | /// a single value for unary nodes, or two values for binary nodes (such as |
173 | | /// joins). |
174 | | fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>>; |
175 | | |
176 | | /// Returns a new `ExecutionPlan` where all existing children were replaced |
177 | | /// by the `children`, in order |
178 | | fn with_new_children( |
179 | | self: Arc<Self>, |
180 | | children: Vec<Arc<dyn ExecutionPlan>>, |
181 | | ) -> Result<Arc<dyn ExecutionPlan>>; |
182 | | |
183 | | /// If supported, attempt to increase the partitioning of this `ExecutionPlan` to |
184 | | /// produce `target_partitions` partitions. |
185 | | /// |
186 | | /// If the `ExecutionPlan` does not support changing its partitioning, |
187 | | /// returns `Ok(None)` (the default). |
188 | | /// |
189 | | /// It is the `ExecutionPlan` can increase its partitioning, but not to the |
190 | | /// `target_partitions`, it may return an ExecutionPlan with fewer |
191 | | /// partitions. This might happen, for example, if each new partition would |
192 | | /// be too small to be efficiently processed individually. |
193 | | /// |
194 | | /// The DataFusion optimizer attempts to use as many threads as possible by |
195 | | /// repartitioning its inputs to match the target number of threads |
196 | | /// available (`target_partitions`). Some data sources, such as the built in |
197 | | /// CSV and Parquet readers, implement this method as they are able to read |
198 | | /// from their input files in parallel, regardless of how the source data is |
199 | | /// split amongst files. |
200 | 0 | fn repartitioned( |
201 | 0 | &self, |
202 | 0 | _target_partitions: usize, |
203 | 0 | _config: &ConfigOptions, |
204 | 0 | ) -> Result<Option<Arc<dyn ExecutionPlan>>> { |
205 | 0 | Ok(None) |
206 | 0 | } |
207 | | |
208 | | /// Begin execution of `partition`, returning a [`Stream`] of |
209 | | /// [`RecordBatch`]es. |
210 | | /// |
211 | | /// # Notes |
212 | | /// |
213 | | /// The `execute` method itself is not `async` but it returns an `async` |
214 | | /// [`futures::stream::Stream`]. This `Stream` should incrementally compute |
215 | | /// the output, `RecordBatch` by `RecordBatch` (in a streaming fashion). |
216 | | /// Most `ExecutionPlan`s should not do any work before the first |
217 | | /// `RecordBatch` is requested from the stream. |
218 | | /// |
219 | | /// [`RecordBatchStreamAdapter`] can be used to convert an `async` |
220 | | /// [`Stream`] into a [`SendableRecordBatchStream`]. |
221 | | /// |
222 | | /// Using `async` `Streams` allows for network I/O during execution and |
223 | | /// takes advantage of Rust's built in support for `async` continuations and |
224 | | /// crate ecosystem. |
225 | | /// |
226 | | /// [`Stream`]: futures::stream::Stream |
227 | | /// [`StreamExt`]: futures::stream::StreamExt |
228 | | /// [`TryStreamExt`]: futures::stream::TryStreamExt |
229 | | /// [`RecordBatchStreamAdapter`]: crate::stream::RecordBatchStreamAdapter |
230 | | /// |
231 | | /// # Error handling |
232 | | /// |
233 | | /// Any error that occurs during execution is sent as an `Err` in the output |
234 | | /// stream. |
235 | | /// |
236 | | /// `ExecutionPlan` implementations in DataFusion cancel additional work |
237 | | /// immediately once an error occurs. The rationale is that if the overall |
238 | | /// query will return an error, any additional work such as continued |
239 | | /// polling of inputs will be wasted as it will be thrown away. |
240 | | /// |
241 | | /// # Cancellation / Aborting Execution |
242 | | /// |
243 | | /// The [`Stream`] that is returned must ensure that any allocated resources |
244 | | /// are freed when the stream itself is dropped. This is particularly |
245 | | /// important for [`spawn`]ed tasks or threads. Unless care is taken to |
246 | | /// "abort" such tasks, they may continue to consume resources even after |
247 | | /// the plan is dropped, generating intermediate results that are never |
248 | | /// used. |
249 | | /// Thus, [`spawn`] is disallowed, and instead use [`SpawnedTask`]. |
250 | | /// |
251 | | /// For more details see [`SpawnedTask`], [`JoinSet`] and [`RecordBatchReceiverStreamBuilder`] |
252 | | /// for structures to help ensure all background tasks are cancelled. |
253 | | /// |
254 | | /// [`spawn`]: tokio::task::spawn |
255 | | /// [`JoinSet`]: tokio::task::JoinSet |
256 | | /// [`SpawnedTask`]: datafusion_common_runtime::SpawnedTask |
257 | | /// [`RecordBatchReceiverStreamBuilder`]: crate::stream::RecordBatchReceiverStreamBuilder |
258 | | /// |
259 | | /// # Implementation Examples |
260 | | /// |
261 | | /// While `async` `Stream`s have a non trivial learning curve, the |
262 | | /// [`futures`] crate provides [`StreamExt`] and [`TryStreamExt`] |
263 | | /// which help simplify many common operations. |
264 | | /// |
265 | | /// Here are some common patterns: |
266 | | /// |
267 | | /// ## Return Precomputed `RecordBatch` |
268 | | /// |
269 | | /// We can return a precomputed `RecordBatch` as a `Stream`: |
270 | | /// |
271 | | /// ``` |
272 | | /// # use std::sync::Arc; |
273 | | /// # use arrow_array::RecordBatch; |
274 | | /// # use arrow_schema::SchemaRef; |
275 | | /// # use datafusion_common::Result; |
276 | | /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext}; |
277 | | /// # use datafusion_physical_plan::memory::MemoryStream; |
278 | | /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter; |
279 | | /// struct MyPlan { |
280 | | /// batch: RecordBatch, |
281 | | /// } |
282 | | /// |
283 | | /// impl MyPlan { |
284 | | /// fn execute( |
285 | | /// &self, |
286 | | /// partition: usize, |
287 | | /// context: Arc<TaskContext> |
288 | | /// ) -> Result<SendableRecordBatchStream> { |
289 | | /// // use functions from futures crate convert the batch into a stream |
290 | | /// let fut = futures::future::ready(Ok(self.batch.clone())); |
291 | | /// let stream = futures::stream::once(fut); |
292 | | /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.batch.schema(), stream))) |
293 | | /// } |
294 | | /// } |
295 | | /// ``` |
296 | | /// |
297 | | /// ## Lazily (async) Compute `RecordBatch` |
298 | | /// |
299 | | /// We can also lazily compute a `RecordBatch` when the returned `Stream` is polled |
300 | | /// |
301 | | /// ``` |
302 | | /// # use std::sync::Arc; |
303 | | /// # use arrow_array::RecordBatch; |
304 | | /// # use arrow_schema::SchemaRef; |
305 | | /// # use datafusion_common::Result; |
306 | | /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext}; |
307 | | /// # use datafusion_physical_plan::memory::MemoryStream; |
308 | | /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter; |
309 | | /// struct MyPlan { |
310 | | /// schema: SchemaRef, |
311 | | /// } |
312 | | /// |
313 | | /// /// Returns a single batch when the returned stream is polled |
314 | | /// async fn get_batch() -> Result<RecordBatch> { |
315 | | /// todo!() |
316 | | /// } |
317 | | /// |
318 | | /// impl MyPlan { |
319 | | /// fn execute( |
320 | | /// &self, |
321 | | /// partition: usize, |
322 | | /// context: Arc<TaskContext> |
323 | | /// ) -> Result<SendableRecordBatchStream> { |
324 | | /// let fut = get_batch(); |
325 | | /// let stream = futures::stream::once(fut); |
326 | | /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream))) |
327 | | /// } |
328 | | /// } |
329 | | /// ``` |
330 | | /// |
331 | | /// ## Lazily (async) create a Stream |
332 | | /// |
333 | | /// If you need to create the return `Stream` using an `async` function, |
334 | | /// you can do so by flattening the result: |
335 | | /// |
336 | | /// ``` |
337 | | /// # use std::sync::Arc; |
338 | | /// # use arrow_array::RecordBatch; |
339 | | /// # use arrow_schema::SchemaRef; |
340 | | /// # use futures::TryStreamExt; |
341 | | /// # use datafusion_common::Result; |
342 | | /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext}; |
343 | | /// # use datafusion_physical_plan::memory::MemoryStream; |
344 | | /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter; |
345 | | /// struct MyPlan { |
346 | | /// schema: SchemaRef, |
347 | | /// } |
348 | | /// |
349 | | /// /// async function that returns a stream |
350 | | /// async fn get_batch_stream() -> Result<SendableRecordBatchStream> { |
351 | | /// todo!() |
352 | | /// } |
353 | | /// |
354 | | /// impl MyPlan { |
355 | | /// fn execute( |
356 | | /// &self, |
357 | | /// partition: usize, |
358 | | /// context: Arc<TaskContext> |
359 | | /// ) -> Result<SendableRecordBatchStream> { |
360 | | /// // A future that yields a stream |
361 | | /// let fut = get_batch_stream(); |
362 | | /// // Use TryStreamExt::try_flatten to flatten the stream of streams |
363 | | /// let stream = futures::stream::once(fut).try_flatten(); |
364 | | /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream))) |
365 | | /// } |
366 | | /// } |
367 | | /// ``` |
368 | | fn execute( |
369 | | &self, |
370 | | partition: usize, |
371 | | context: Arc<TaskContext>, |
372 | | ) -> Result<SendableRecordBatchStream>; |
373 | | |
374 | | /// Return a snapshot of the set of [`Metric`]s for this |
375 | | /// [`ExecutionPlan`]. If no `Metric`s are available, return None. |
376 | | /// |
377 | | /// While the values of the metrics in the returned |
378 | | /// [`MetricsSet`]s may change as execution progresses, the |
379 | | /// specific metrics will not. |
380 | | /// |
381 | | /// Once `self.execute()` has returned (technically the future is |
382 | | /// resolved) for all available partitions, the set of metrics |
383 | | /// should be complete. If this function is called prior to |
384 | | /// `execute()` new metrics may appear in subsequent calls. |
385 | 0 | fn metrics(&self) -> Option<MetricsSet> { |
386 | 0 | None |
387 | 0 | } |
388 | | |
389 | | /// Returns statistics for this `ExecutionPlan` node. If statistics are not |
390 | | /// available, should return [`Statistics::new_unknown`] (the default), not |
391 | | /// an error. |
392 | | /// |
393 | | /// For TableScan executors, which supports filter pushdown, special attention |
394 | | /// needs to be paid to whether the stats returned by this method are exact or not |
395 | 0 | fn statistics(&self) -> Result<Statistics> { |
396 | 0 | Ok(Statistics::new_unknown(&self.schema())) |
397 | 0 | } |
398 | | |
399 | | /// Returns `true` if a limit can be safely pushed down through this |
400 | | /// `ExecutionPlan` node. |
401 | | /// |
402 | | /// If this method returns `true`, and the query plan contains a limit at |
403 | | /// the output of this node, DataFusion will push the limit to the input |
404 | | /// of this node. |
405 | 0 | fn supports_limit_pushdown(&self) -> bool { |
406 | 0 | false |
407 | 0 | } |
408 | | |
409 | | /// Returns a fetching variant of this `ExecutionPlan` node, if it supports |
410 | | /// fetch limits. Returns `None` otherwise. |
411 | 0 | fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> { |
412 | 0 | None |
413 | 0 | } |
414 | | |
415 | | /// Gets the fetch count for the operator, `None` means there is no fetch. |
416 | 0 | fn fetch(&self) -> Option<usize> { |
417 | 0 | None |
418 | 0 | } |
419 | | } |
420 | | |
421 | | /// Extension trait provides an easy API to fetch various properties of |
422 | | /// [`ExecutionPlan`] objects based on [`ExecutionPlan::properties`]. |
423 | | pub trait ExecutionPlanProperties { |
424 | | /// Specifies how the output of this `ExecutionPlan` is split into |
425 | | /// partitions. |
426 | | fn output_partitioning(&self) -> &Partitioning; |
427 | | |
428 | | /// Specifies whether this plan generates an infinite stream of records. |
429 | | /// If the plan does not support pipelining, but its input(s) are |
430 | | /// infinite, returns [`ExecutionMode::PipelineBreaking`] to indicate this. |
431 | | fn execution_mode(&self) -> ExecutionMode; |
432 | | |
433 | | /// If the output of this `ExecutionPlan` within each partition is sorted, |
434 | | /// returns `Some(keys)` describing the ordering. A `None` return value |
435 | | /// indicates no assumptions should be made on the output ordering. |
436 | | /// |
437 | | /// For example, `SortExec` (obviously) produces sorted output as does |
438 | | /// `SortPreservingMergeStream`. Less obviously, `Projection` produces sorted |
439 | | /// output if its input is sorted as it does not reorder the input rows. |
440 | | fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>; |
441 | | |
442 | | /// Get the [`EquivalenceProperties`] within the plan. |
443 | | /// |
444 | | /// Equivalence properties tell DataFusion what columns are known to be |
445 | | /// equal, during various optimization passes. By default, this returns "no |
446 | | /// known equivalences" which is always correct, but may cause DataFusion to |
447 | | /// unnecessarily resort data. |
448 | | /// |
449 | | /// If this ExecutionPlan makes no changes to the schema of the rows flowing |
450 | | /// through it or how columns within each row relate to each other, it |
451 | | /// should return the equivalence properties of its input. For |
452 | | /// example, since `FilterExec` may remove rows from its input, but does not |
453 | | /// otherwise modify them, it preserves its input equivalence properties. |
454 | | /// However, since `ProjectionExec` may calculate derived expressions, it |
455 | | /// needs special handling. |
456 | | /// |
457 | | /// See also [`ExecutionPlan::maintains_input_order`] and [`Self::output_ordering`] |
458 | | /// for related concepts. |
459 | | fn equivalence_properties(&self) -> &EquivalenceProperties; |
460 | | } |
461 | | |
462 | | impl ExecutionPlanProperties for Arc<dyn ExecutionPlan> { |
463 | 19.0k | fn output_partitioning(&self) -> &Partitioning { |
464 | 19.0k | self.properties().output_partitioning() |
465 | 19.0k | } |
466 | | |
467 | 5.27k | fn execution_mode(&self) -> ExecutionMode { |
468 | 5.27k | self.properties().execution_mode() |
469 | 5.27k | } |
470 | | |
471 | 2.55k | fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { |
472 | 2.55k | self.properties().output_ordering() |
473 | 2.55k | } |
474 | | |
475 | 4.20k | fn equivalence_properties(&self) -> &EquivalenceProperties { |
476 | 4.20k | self.properties().equivalence_properties() |
477 | 4.20k | } |
478 | | } |
479 | | |
480 | | impl ExecutionPlanProperties for &dyn ExecutionPlan { |
481 | 0 | fn output_partitioning(&self) -> &Partitioning { |
482 | 0 | self.properties().output_partitioning() |
483 | 0 | } |
484 | | |
485 | 0 | fn execution_mode(&self) -> ExecutionMode { |
486 | 0 | self.properties().execution_mode() |
487 | 0 | } |
488 | | |
489 | 0 | fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { |
490 | 0 | self.properties().output_ordering() |
491 | 0 | } |
492 | | |
493 | 0 | fn equivalence_properties(&self) -> &EquivalenceProperties { |
494 | 0 | self.properties().equivalence_properties() |
495 | 0 | } |
496 | | } |
497 | | |
498 | | /// Describes the execution mode of the result of calling |
499 | | /// [`ExecutionPlan::execute`] with respect to its size and behavior. |
500 | | /// |
501 | | /// The mode of the execution plan is determined by the mode of its input |
502 | | /// execution plans and the details of the operator itself. For example, a |
503 | | /// `FilterExec` operator will have the same execution mode as its input, but a |
504 | | /// `SortExec` operator may have a different execution mode than its input, |
505 | | /// depending on how the input stream is sorted. |
506 | | /// |
507 | | /// There are three possible execution modes: `Bounded`, `Unbounded` and |
508 | | /// `PipelineBreaking`. |
509 | | #[derive(Clone, Copy, PartialEq, Debug)] |
510 | | pub enum ExecutionMode { |
511 | | /// The stream is bounded / finite. |
512 | | /// |
513 | | /// In this case the stream will eventually return `None` to indicate that |
514 | | /// there are no more records to process. |
515 | | Bounded, |
516 | | /// The stream is unbounded / infinite. |
517 | | /// |
518 | | /// In this case, the stream will never be done (never return `None`), |
519 | | /// except in case of error. |
520 | | /// |
521 | | /// This mode is often used in "Steaming" use cases where data is |
522 | | /// incrementally processed as it arrives. |
523 | | /// |
524 | | /// Note that even though the operator generates an unbounded stream of |
525 | | /// results, it can execute with bounded memory and incrementally produces |
526 | | /// output. |
527 | | Unbounded, |
528 | | /// Some of the operator's input stream(s) are unbounded, but the operator |
529 | | /// cannot generate streaming results from these streaming inputs. |
530 | | /// |
531 | | /// In this case, the execution mode will be pipeline breaking, e.g. the |
532 | | /// operator requires unbounded memory to generate results. This |
533 | | /// information is used by the planner when performing sanity checks |
534 | | /// on plans processings unbounded data sources. |
535 | | PipelineBreaking, |
536 | | } |
537 | | |
538 | | impl ExecutionMode { |
539 | | /// Check whether the execution mode is unbounded or not. |
540 | 1.42k | pub fn is_unbounded(&self) -> bool { |
541 | 1.42k | matches!(self, ExecutionMode::Unbounded) |
542 | 1.42k | } |
543 | | |
544 | | /// Check whether the execution is pipeline friendly. If so, operator can |
545 | | /// execute safely. |
546 | 0 | pub fn pipeline_friendly(&self) -> bool { |
547 | 0 | matches!(self, ExecutionMode::Bounded | ExecutionMode::Unbounded) |
548 | 0 | } |
549 | | } |
550 | | |
551 | | /// Conservatively "combines" execution modes of a given collection of operators. |
552 | 1.15k | pub(crate) fn execution_mode_from_children<'a>( |
553 | 1.15k | children: impl IntoIterator<Item = &'a Arc<dyn ExecutionPlan>>, |
554 | 1.15k | ) -> ExecutionMode { |
555 | 1.15k | let mut result = ExecutionMode::Bounded; |
556 | 2.30k | for mode in children.into_iter().map(1.15k |child| child.execution_mode())1.15k { |
557 | 2.30k | match (mode, result) { |
558 | | (ExecutionMode::PipelineBreaking, _) |
559 | | | (_, ExecutionMode::PipelineBreaking) => { |
560 | | // If any of the modes is `PipelineBreaking`, so is the result: |
561 | 0 | return ExecutionMode::PipelineBreaking; |
562 | | } |
563 | 0 | (ExecutionMode::Unbounded, _) | (_, ExecutionMode::Unbounded) => { |
564 | 0 | // Unbounded mode eats up bounded mode: |
565 | 0 | result = ExecutionMode::Unbounded; |
566 | 0 | } |
567 | 2.30k | (ExecutionMode::Bounded, ExecutionMode::Bounded) => { |
568 | 2.30k | // When both modes are bounded, so is the result: |
569 | 2.30k | result = ExecutionMode::Bounded; |
570 | 2.30k | } |
571 | | } |
572 | | } |
573 | 1.15k | result |
574 | 1.15k | } |
575 | | |
576 | | /// Stores certain, often expensive to compute, plan properties used in query |
577 | | /// optimization. |
578 | | /// |
579 | | /// These properties are stored a single structure to permit this information to |
580 | | /// be computed once and then those cached results used multiple times without |
581 | | /// recomputation (aka a cache) |
582 | | #[derive(Debug, Clone)] |
583 | | pub struct PlanProperties { |
584 | | /// See [ExecutionPlanProperties::equivalence_properties] |
585 | | pub eq_properties: EquivalenceProperties, |
586 | | /// See [ExecutionPlanProperties::output_partitioning] |
587 | | pub partitioning: Partitioning, |
588 | | /// See [ExecutionPlanProperties::execution_mode] |
589 | | pub execution_mode: ExecutionMode, |
590 | | /// See [ExecutionPlanProperties::output_ordering] |
591 | | output_ordering: Option<LexOrdering>, |
592 | | } |
593 | | |
594 | | impl PlanProperties { |
595 | | /// Construct a new `PlanPropertiesCache` from the |
596 | 4.01k | pub fn new( |
597 | 4.01k | eq_properties: EquivalenceProperties, |
598 | 4.01k | partitioning: Partitioning, |
599 | 4.01k | execution_mode: ExecutionMode, |
600 | 4.01k | ) -> Self { |
601 | 4.01k | // Output ordering can be derived from `eq_properties`. |
602 | 4.01k | let output_ordering = eq_properties.output_ordering(); |
603 | 4.01k | Self { |
604 | 4.01k | eq_properties, |
605 | 4.01k | partitioning, |
606 | 4.01k | execution_mode, |
607 | 4.01k | output_ordering, |
608 | 4.01k | } |
609 | 4.01k | } |
610 | | |
611 | | /// Overwrite output partitioning with its new value. |
612 | 2 | pub fn with_partitioning(mut self, partitioning: Partitioning) -> Self { |
613 | 2 | self.partitioning = partitioning; |
614 | 2 | self |
615 | 2 | } |
616 | | |
617 | | /// Overwrite the execution Mode with its new value. |
618 | 0 | pub fn with_execution_mode(mut self, execution_mode: ExecutionMode) -> Self { |
619 | 0 | self.execution_mode = execution_mode; |
620 | 0 | self |
621 | 0 | } |
622 | | |
623 | | /// Overwrite equivalence properties with its new value. |
624 | 713 | pub fn with_eq_properties(mut self, eq_properties: EquivalenceProperties) -> Self { |
625 | 713 | // Changing equivalence properties also changes output ordering, so |
626 | 713 | // make sure to overwrite it: |
627 | 713 | self.output_ordering = eq_properties.output_ordering(); |
628 | 713 | self.eq_properties = eq_properties; |
629 | 713 | self |
630 | 713 | } |
631 | | |
632 | 4.27k | pub fn equivalence_properties(&self) -> &EquivalenceProperties { |
633 | 4.27k | &self.eq_properties |
634 | 4.27k | } |
635 | | |
636 | 19.0k | pub fn output_partitioning(&self) -> &Partitioning { |
637 | 19.0k | &self.partitioning |
638 | 19.0k | } |
639 | | |
640 | 2.55k | pub fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { |
641 | 2.55k | self.output_ordering.as_deref() |
642 | 2.55k | } |
643 | | |
644 | 5.27k | pub fn execution_mode(&self) -> ExecutionMode { |
645 | 5.27k | self.execution_mode |
646 | 5.27k | } |
647 | | |
648 | | /// Get schema of the node. |
649 | 25.7k | fn schema(&self) -> &SchemaRef { |
650 | 25.7k | self.eq_properties.schema() |
651 | 25.7k | } |
652 | | } |
653 | | |
654 | | /// Indicate whether a data exchange is needed for the input of `plan`, which will be very helpful |
655 | | /// especially for the distributed engine to judge whether need to deal with shuffling. |
656 | | /// Currently there are 3 kinds of execution plan which needs data exchange |
657 | | /// 1. RepartitionExec for changing the partition number between two `ExecutionPlan`s |
658 | | /// 2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee |
659 | | /// 3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee |
660 | 0 | pub fn need_data_exchange(plan: Arc<dyn ExecutionPlan>) -> bool { |
661 | 0 | if let Some(repartition) = plan.as_any().downcast_ref::<RepartitionExec>() { |
662 | 0 | !matches!( |
663 | 0 | repartition.properties().output_partitioning(), |
664 | | Partitioning::RoundRobinBatch(_) |
665 | | ) |
666 | 0 | } else if let Some(coalesce) = plan.as_any().downcast_ref::<CoalescePartitionsExec>() |
667 | | { |
668 | 0 | coalesce.input().output_partitioning().partition_count() > 1 |
669 | 0 | } else if let Some(sort_preserving_merge) = |
670 | 0 | plan.as_any().downcast_ref::<SortPreservingMergeExec>() |
671 | | { |
672 | 0 | sort_preserving_merge |
673 | 0 | .input() |
674 | 0 | .output_partitioning() |
675 | 0 | .partition_count() |
676 | 0 | > 1 |
677 | | } else { |
678 | 0 | false |
679 | | } |
680 | 0 | } |
681 | | |
682 | | /// Returns a copy of this plan if we change any child according to the pointer comparison. |
683 | | /// The size of `children` must be equal to the size of `ExecutionPlan::children()`. |
684 | 4 | pub fn with_new_children_if_necessary( |
685 | 4 | plan: Arc<dyn ExecutionPlan>, |
686 | 4 | children: Vec<Arc<dyn ExecutionPlan>>, |
687 | 4 | ) -> Result<Arc<dyn ExecutionPlan>> { |
688 | 4 | let old_children = plan.children(); |
689 | 4 | if children.len() != old_children.len() { |
690 | 2 | internal_err!("Wrong number of children") |
691 | 2 | } else if children.is_empty() |
692 | 0 | || children |
693 | 0 | .iter() |
694 | 0 | .zip(old_children.iter()) |
695 | 0 | .any(|(c1, c2)| !Arc::ptr_eq(c1, c2)) |
696 | | { |
697 | 2 | plan.with_new_children(children) |
698 | | } else { |
699 | 0 | Ok(plan) |
700 | | } |
701 | 4 | } |
702 | | |
703 | | /// Return a [wrapper](DisplayableExecutionPlan) around an |
704 | | /// [`ExecutionPlan`] which can be displayed in various easier to |
705 | | /// understand ways. |
706 | 5 | pub fn displayable(plan: &dyn ExecutionPlan) -> DisplayableExecutionPlan<'_> { |
707 | 5 | DisplayableExecutionPlan::new(plan) |
708 | 5 | } |
709 | | |
710 | | /// Execute the [ExecutionPlan] and collect the results in memory |
711 | 65 | pub async fn collect( |
712 | 65 | plan: Arc<dyn ExecutionPlan>, |
713 | 65 | context: Arc<TaskContext>, |
714 | 65 | ) -> Result<Vec<RecordBatch>> { |
715 | 65 | let stream64 = execute_stream(plan, context)?1 ; |
716 | 297 | crate::common::collect(stream)64 .await |
717 | 55 | } |
718 | | |
719 | | /// Execute the [ExecutionPlan] and return a single stream of `RecordBatch`es. |
720 | | /// |
721 | | /// See [collect] to buffer the `RecordBatch`es in memory. |
722 | | /// |
723 | | /// # Aborting Execution |
724 | | /// |
725 | | /// Dropping the stream will abort the execution of the query, and free up |
726 | | /// any allocated resources |
727 | 66 | pub fn execute_stream( |
728 | 66 | plan: Arc<dyn ExecutionPlan>, |
729 | 66 | context: Arc<TaskContext>, |
730 | 66 | ) -> Result<SendableRecordBatchStream> { |
731 | 66 | match plan.output_partitioning().partition_count() { |
732 | 0 | 0 => Ok(Box::pin(EmptyRecordBatchStream::new(plan.schema()))), |
733 | 65 | 1 => plan.execute(0, context), |
734 | 1 | 2.. => { |
735 | | // merge into a single partition |
736 | 1 | let plan = CoalescePartitionsExec::new(Arc::clone(&plan)); |
737 | 1 | // CoalescePartitionsExec must produce a single partition |
738 | 1 | assert_eq!(1, plan.properties().output_partitioning().partition_count()); |
739 | 1 | plan.execute(0, context) |
740 | | } |
741 | | } |
742 | 66 | } |
743 | | |
744 | | /// Execute the [ExecutionPlan] and collect the results in memory |
745 | 2 | pub async fn collect_partitioned( |
746 | 2 | plan: Arc<dyn ExecutionPlan>, |
747 | 2 | context: Arc<TaskContext>, |
748 | 2 | ) -> Result<Vec<Vec<RecordBatch>>> { |
749 | 2 | let streams = execute_stream_partitioned(plan, context)?0 ; |
750 | | |
751 | 2 | let mut join_set = JoinSet::new(); |
752 | 2 | // Execute the plan and collect the results into batches. |
753 | 2 | streams.into_iter().enumerate().for_each(|(idx, stream)| { |
754 | 2 | join_set.spawn(async move { |
755 | 2 | let result: Result<Vec<RecordBatch>> = stream.try_collect().await0 ; |
756 | 2 | (idx, result) |
757 | 2 | }); |
758 | 2 | }); |
759 | 2 | |
760 | 2 | let mut batches = vec![]; |
761 | | // Note that currently this doesn't identify the thread that panicked |
762 | | // |
763 | | // TODO: Replace with [join_next_with_id](https://docs.rs/tokio/latest/tokio/task/struct.JoinSet.html#method.join_next_with_id |
764 | | // once it is stable |
765 | 4 | while let Some(result2 ) = join_set.join_next().await2 { |
766 | 2 | match result { |
767 | 2 | Ok((idx, res)) => batches.push((idx, res?0 )), |
768 | 0 | Err(e) => { |
769 | 0 | if e.is_panic() { |
770 | 0 | std::panic::resume_unwind(e.into_panic()); |
771 | | } else { |
772 | 0 | unreachable!(); |
773 | | } |
774 | | } |
775 | | } |
776 | | } |
777 | | |
778 | 2 | batches.sort_by_key(|(idx, _)| *idx0 ); |
779 | 2 | let batches = batches.into_iter().map(|(_, batch)| batch).collect(); |
780 | 2 | |
781 | 2 | Ok(batches) |
782 | 2 | } |
783 | | |
784 | | /// Execute the [ExecutionPlan] and return a vec with one stream per output |
785 | | /// partition |
786 | | /// |
787 | | /// # Aborting Execution |
788 | | /// |
789 | | /// Dropping the stream will abort the execution of the query, and free up |
790 | | /// any allocated resources |
791 | 2 | pub fn execute_stream_partitioned( |
792 | 2 | plan: Arc<dyn ExecutionPlan>, |
793 | 2 | context: Arc<TaskContext>, |
794 | 2 | ) -> Result<Vec<SendableRecordBatchStream>> { |
795 | 2 | let num_partitions = plan.output_partitioning().partition_count(); |
796 | 2 | let mut streams = Vec::with_capacity(num_partitions); |
797 | 2 | for i in 0..num_partitions { |
798 | 2 | streams.push(plan.execute(i, Arc::clone(&context))?0 ); |
799 | | } |
800 | 2 | Ok(streams) |
801 | 2 | } |
802 | | |
803 | | /// Executes an input stream and ensures that the resulting stream adheres to |
804 | | /// the `not null` constraints specified in the `sink_schema`. |
805 | | /// |
806 | | /// # Arguments |
807 | | /// |
808 | | /// * `input` - An execution plan |
809 | | /// * `sink_schema` - The schema to be applied to the output stream |
810 | | /// * `partition` - The partition index to be executed |
811 | | /// * `context` - The task context |
812 | | /// |
813 | | /// # Returns |
814 | | /// |
815 | | /// * `Result<SendableRecordBatchStream>` - A stream of `RecordBatch`es if successful |
816 | | /// |
817 | | /// This function first executes the given input plan for the specified partition |
818 | | /// and context. It then checks if there are any columns in the input that might |
819 | | /// violate the `not null` constraints specified in the `sink_schema`. If there are |
820 | | /// such columns, it wraps the resulting stream to enforce the `not null` constraints |
821 | | /// by invoking the `check_not_null_contraits` function on each batch of the stream. |
822 | 0 | pub fn execute_input_stream( |
823 | 0 | input: Arc<dyn ExecutionPlan>, |
824 | 0 | sink_schema: SchemaRef, |
825 | 0 | partition: usize, |
826 | 0 | context: Arc<TaskContext>, |
827 | 0 | ) -> Result<SendableRecordBatchStream> { |
828 | 0 | let input_stream = input.execute(partition, context)?; |
829 | | |
830 | 0 | debug_assert_eq!(sink_schema.fields().len(), input.schema().fields().len()); |
831 | | |
832 | | // Find input columns that may violate the not null constraint. |
833 | 0 | let risky_columns: Vec<_> = sink_schema |
834 | 0 | .fields() |
835 | 0 | .iter() |
836 | 0 | .zip(input.schema().fields().iter()) |
837 | 0 | .enumerate() |
838 | 0 | .filter_map(|(idx, (sink_field, input_field))| { |
839 | 0 | (!sink_field.is_nullable() && input_field.is_nullable()).then_some(idx) |
840 | 0 | }) |
841 | 0 | .collect(); |
842 | 0 |
|
843 | 0 | if risky_columns.is_empty() { |
844 | 0 | Ok(input_stream) |
845 | | } else { |
846 | | // Check not null constraint on the input stream |
847 | 0 | Ok(Box::pin(RecordBatchStreamAdapter::new( |
848 | 0 | sink_schema, |
849 | 0 | input_stream |
850 | 0 | .map(move |batch| check_not_null_contraits(batch?, &risky_columns)), |
851 | 0 | ))) |
852 | | } |
853 | 0 | } |
854 | | |
855 | | /// Checks a `RecordBatch` for `not null` constraints on specified columns. |
856 | | /// |
857 | | /// # Arguments |
858 | | /// |
859 | | /// * `batch` - The `RecordBatch` to be checked |
860 | | /// * `column_indices` - A vector of column indices that should be checked for |
861 | | /// `not null` constraints. |
862 | | /// |
863 | | /// # Returns |
864 | | /// |
865 | | /// * `Result<RecordBatch>` - The original `RecordBatch` if all constraints are met |
866 | | /// |
867 | | /// This function iterates over the specified column indices and ensures that none |
868 | | /// of the columns contain null values. If any column contains null values, an error |
869 | | /// is returned. |
870 | 0 | pub fn check_not_null_contraits( |
871 | 0 | batch: RecordBatch, |
872 | 0 | column_indices: &Vec<usize>, |
873 | 0 | ) -> Result<RecordBatch> { |
874 | 0 | for &index in column_indices { |
875 | 0 | if batch.num_columns() <= index { |
876 | 0 | return exec_err!( |
877 | 0 | "Invalid batch column count {} expected > {}", |
878 | 0 | batch.num_columns(), |
879 | 0 | index |
880 | 0 | ); |
881 | 0 | } |
882 | 0 |
|
883 | 0 | if batch.column(index).null_count() > 0 { |
884 | 0 | return exec_err!( |
885 | 0 | "Invalid batch column at '{}' has null but schema specifies non-nullable", |
886 | 0 | index |
887 | 0 | ); |
888 | 0 | } |
889 | | } |
890 | | |
891 | 0 | Ok(batch) |
892 | 0 | } |
893 | | |
894 | | /// Utility function yielding a string representation of the given [`ExecutionPlan`]. |
895 | 2 | pub fn get_plan_string(plan: &Arc<dyn ExecutionPlan>) -> Vec<String> { |
896 | 2 | let formatted = displayable(plan.as_ref()).indent(true).to_string(); |
897 | 2 | let actual: Vec<&str> = formatted.trim().lines().collect(); |
898 | 5 | actual.iter().map(|elem| elem.to_string()).collect() |
899 | 2 | } |
900 | | |
901 | | #[cfg(test)] |
902 | | mod tests { |
903 | | use super::*; |
904 | | use std::any::Any; |
905 | | use std::sync::Arc; |
906 | | |
907 | | use arrow_schema::{Schema, SchemaRef}; |
908 | | |
909 | | use datafusion_common::{Result, Statistics}; |
910 | | use datafusion_execution::{SendableRecordBatchStream, TaskContext}; |
911 | | |
912 | | use crate::{DisplayAs, DisplayFormatType, ExecutionPlan}; |
913 | | |
914 | | #[derive(Debug)] |
915 | | pub struct EmptyExec; |
916 | | |
917 | | impl EmptyExec { |
918 | 1 | pub fn new(_schema: SchemaRef) -> Self { |
919 | 1 | Self |
920 | 1 | } |
921 | | } |
922 | | |
923 | | impl DisplayAs for EmptyExec { |
924 | 0 | fn fmt_as( |
925 | 0 | &self, |
926 | 0 | _t: DisplayFormatType, |
927 | 0 | _f: &mut std::fmt::Formatter, |
928 | 0 | ) -> std::fmt::Result { |
929 | 0 | unimplemented!() |
930 | | } |
931 | | } |
932 | | |
933 | | impl ExecutionPlan for EmptyExec { |
934 | 1 | fn name(&self) -> &'static str { |
935 | 1 | Self::static_name() |
936 | 1 | } |
937 | | |
938 | 0 | fn as_any(&self) -> &dyn Any { |
939 | 0 | self |
940 | 0 | } |
941 | | |
942 | 0 | fn properties(&self) -> &PlanProperties { |
943 | 0 | unimplemented!() |
944 | | } |
945 | | |
946 | 0 | fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> { |
947 | 0 | vec![] |
948 | 0 | } |
949 | | |
950 | 0 | fn with_new_children( |
951 | 0 | self: Arc<Self>, |
952 | 0 | _: Vec<Arc<dyn ExecutionPlan>>, |
953 | 0 | ) -> Result<Arc<dyn ExecutionPlan>> { |
954 | 0 | unimplemented!() |
955 | | } |
956 | | |
957 | 0 | fn execute( |
958 | 0 | &self, |
959 | 0 | _partition: usize, |
960 | 0 | _context: Arc<TaskContext>, |
961 | 0 | ) -> Result<SendableRecordBatchStream> { |
962 | 0 | unimplemented!() |
963 | | } |
964 | | |
965 | 0 | fn statistics(&self) -> Result<Statistics> { |
966 | 0 | unimplemented!() |
967 | | } |
968 | | } |
969 | | |
970 | | #[derive(Debug)] |
971 | | pub struct RenamedEmptyExec; |
972 | | |
973 | | impl RenamedEmptyExec { |
974 | 1 | pub fn new(_schema: SchemaRef) -> Self { |
975 | 1 | Self |
976 | 1 | } |
977 | | } |
978 | | |
979 | | impl DisplayAs for RenamedEmptyExec { |
980 | 0 | fn fmt_as( |
981 | 0 | &self, |
982 | 0 | _t: DisplayFormatType, |
983 | 0 | _f: &mut std::fmt::Formatter, |
984 | 0 | ) -> std::fmt::Result { |
985 | 0 | unimplemented!() |
986 | | } |
987 | | } |
988 | | |
989 | | impl ExecutionPlan for RenamedEmptyExec { |
990 | 1 | fn name(&self) -> &'static str { |
991 | 1 | Self::static_name() |
992 | 1 | } |
993 | | |
994 | 2 | fn static_name() -> &'static str |
995 | 2 | where |
996 | 2 | Self: Sized, |
997 | 2 | { |
998 | 2 | "MyRenamedEmptyExec" |
999 | 2 | } |
1000 | | |
1001 | 0 | fn as_any(&self) -> &dyn Any { |
1002 | 0 | self |
1003 | 0 | } |
1004 | | |
1005 | 0 | fn properties(&self) -> &PlanProperties { |
1006 | 0 | unimplemented!() |
1007 | | } |
1008 | | |
1009 | 0 | fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> { |
1010 | 0 | vec![] |
1011 | 0 | } |
1012 | | |
1013 | 0 | fn with_new_children( |
1014 | 0 | self: Arc<Self>, |
1015 | 0 | _: Vec<Arc<dyn ExecutionPlan>>, |
1016 | 0 | ) -> Result<Arc<dyn ExecutionPlan>> { |
1017 | 0 | unimplemented!() |
1018 | | } |
1019 | | |
1020 | 0 | fn execute( |
1021 | 0 | &self, |
1022 | 0 | _partition: usize, |
1023 | 0 | _context: Arc<TaskContext>, |
1024 | 0 | ) -> Result<SendableRecordBatchStream> { |
1025 | 0 | unimplemented!() |
1026 | | } |
1027 | | |
1028 | 0 | fn statistics(&self) -> Result<Statistics> { |
1029 | 0 | unimplemented!() |
1030 | | } |
1031 | | } |
1032 | | |
1033 | | #[test] |
1034 | 1 | fn test_execution_plan_name() { |
1035 | 1 | let schema1 = Arc::new(Schema::empty()); |
1036 | 1 | let default_name_exec = EmptyExec::new(schema1); |
1037 | 1 | assert_eq!(default_name_exec.name(), "EmptyExec"); |
1038 | | |
1039 | 1 | let schema2 = Arc::new(Schema::empty()); |
1040 | 1 | let renamed_exec = RenamedEmptyExec::new(schema2); |
1041 | 1 | assert_eq!(renamed_exec.name(), "MyRenamedEmptyExec"); |
1042 | 1 | assert_eq!(RenamedEmptyExec::static_name(), "MyRenamedEmptyExec"); |
1043 | 1 | } |
1044 | | |
1045 | | /// A compilation test to ensure that the `ExecutionPlan::name()` method can |
1046 | | /// be called from a trait object. |
1047 | | /// Related ticket: https://github.com/apache/datafusion/pull/11047 |
1048 | | #[allow(dead_code)] |
1049 | 0 | fn use_execution_plan_as_trait_object(plan: &dyn ExecutionPlan) { |
1050 | 0 | let _ = plan.name(); |
1051 | 0 | } |
1052 | | } |
1053 | | |
1054 | | // pub mod test; |