/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/union.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | // Some of these functions reference the Postgres documentation |
19 | | // or implementation to ensure compatibility and are subject to |
20 | | // the Postgres license. |
21 | | |
22 | | //! The Union operator combines multiple inputs with the same schema |
23 | | |
24 | | use std::borrow::Borrow; |
25 | | use std::pin::Pin; |
26 | | use std::task::{Context, Poll}; |
27 | | use std::{any::Any, sync::Arc}; |
28 | | |
29 | | use super::{ |
30 | | execution_mode_from_children, |
31 | | metrics::{ExecutionPlanMetricsSet, MetricsSet}, |
32 | | ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, |
33 | | ExecutionPlanProperties, Partitioning, PlanProperties, RecordBatchStream, |
34 | | SendableRecordBatchStream, Statistics, |
35 | | }; |
36 | | use crate::metrics::BaselineMetrics; |
37 | | use crate::stream::ObservedStream; |
38 | | |
39 | | use arrow::datatypes::{Field, Schema, SchemaRef}; |
40 | | use arrow::record_batch::RecordBatch; |
41 | | use datafusion_common::stats::Precision; |
42 | | use datafusion_common::{exec_err, internal_err, Result}; |
43 | | use datafusion_execution::TaskContext; |
44 | | use datafusion_physical_expr::{calculate_union, EquivalenceProperties}; |
45 | | |
46 | | use futures::Stream; |
47 | | use itertools::Itertools; |
48 | | use log::{debug, trace, warn}; |
49 | | use tokio::macros::support::thread_rng_n; |
50 | | |
51 | | /// `UnionExec`: `UNION ALL` execution plan. |
52 | | /// |
53 | | /// `UnionExec` combines multiple inputs with the same schema by |
54 | | /// concatenating the partitions. It does not mix or copy data within |
55 | | /// or across partitions. Thus if the input partitions are sorted, the |
56 | | /// output partitions of the union are also sorted. |
57 | | /// |
58 | | /// For example, given a `UnionExec` of two inputs, with `N` |
59 | | /// partitions, and `M` partitions, there will be `N+M` output |
60 | | /// partitions. The first `N` output partitions are from Input 1 |
61 | | /// partitions, and then next `M` output partitions are from Input 2. |
62 | | /// |
63 | | /// ```text |
64 | | /// ▲ ▲ ▲ ▲ |
65 | | /// │ │ │ │ |
66 | | /// Output │ ... │ │ │ |
67 | | /// Partitions │0 │N-1 │ N │N+M-1 |
68 | | ///(passes through ┌────┴───────┴───────────┴─────────┴───┐ |
69 | | /// the N+M input │ UnionExec │ |
70 | | /// partitions) │ │ |
71 | | /// └──────────────────────────────────────┘ |
72 | | /// ▲ |
73 | | /// │ |
74 | | /// │ |
75 | | /// Input ┌────────┬─────┴────┬──────────┐ |
76 | | /// Partitions │ ... │ │ ... │ |
77 | | /// 0 │ │ N-1 │ 0 │ M-1 |
78 | | /// ┌────┴────────┴───┐ ┌───┴──────────┴───┐ |
79 | | /// │ │ │ │ |
80 | | /// │ │ │ │ |
81 | | /// │ │ │ │ |
82 | | /// │ │ │ │ |
83 | | /// │ │ │ │ |
84 | | /// │ │ │ │ |
85 | | /// │Input 1 │ │Input 2 │ |
86 | | /// └─────────────────┘ └──────────────────┘ |
87 | | /// ``` |
88 | | #[derive(Debug)] |
89 | | pub struct UnionExec { |
90 | | /// Input execution plan |
91 | | inputs: Vec<Arc<dyn ExecutionPlan>>, |
92 | | /// Execution metrics |
93 | | metrics: ExecutionPlanMetricsSet, |
94 | | /// Cache holding plan properties like equivalences, output partitioning etc. |
95 | | cache: PlanProperties, |
96 | | } |
97 | | |
98 | | impl UnionExec { |
99 | | /// Create a new UnionExec |
100 | 5 | pub fn new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Self { |
101 | 5 | let schema = union_schema(&inputs); |
102 | 5 | // The schema of the inputs and the union schema is consistent when: |
103 | 5 | // - They have the same number of fields, and |
104 | 5 | // - Their fields have same types at the same indices. |
105 | 5 | // Here, we know that schemas are consistent and the call below can |
106 | 5 | // not return an error. |
107 | 5 | let cache = Self::compute_properties(&inputs, schema).unwrap(); |
108 | 5 | UnionExec { |
109 | 5 | inputs, |
110 | 5 | metrics: ExecutionPlanMetricsSet::new(), |
111 | 5 | cache, |
112 | 5 | } |
113 | 5 | } |
114 | | |
115 | | /// Get inputs of the execution plan |
116 | 0 | pub fn inputs(&self) -> &Vec<Arc<dyn ExecutionPlan>> { |
117 | 0 | &self.inputs |
118 | 0 | } |
119 | | |
120 | | /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. |
121 | 5 | fn compute_properties( |
122 | 5 | inputs: &[Arc<dyn ExecutionPlan>], |
123 | 5 | schema: SchemaRef, |
124 | 5 | ) -> Result<PlanProperties> { |
125 | 5 | // Calculate equivalence properties: |
126 | 5 | let children_eqps = inputs |
127 | 5 | .iter() |
128 | 10 | .map(|child| child.equivalence_properties().clone()) |
129 | 5 | .collect::<Vec<_>>(); |
130 | 5 | let eq_properties = calculate_union(children_eqps, schema)?0 ; |
131 | | |
132 | | // Calculate output partitioning; i.e. sum output partitions of the inputs. |
133 | 5 | let num_partitions = inputs |
134 | 5 | .iter() |
135 | 10 | .map(|plan| plan.output_partitioning().partition_count()) |
136 | 5 | .sum(); |
137 | 5 | let output_partitioning = Partitioning::UnknownPartitioning(num_partitions); |
138 | 5 | |
139 | 5 | // Determine execution mode: |
140 | 5 | let mode = execution_mode_from_children(inputs.iter()); |
141 | 5 | |
142 | 5 | Ok(PlanProperties::new( |
143 | 5 | eq_properties, |
144 | 5 | output_partitioning, |
145 | 5 | mode, |
146 | 5 | )) |
147 | 5 | } |
148 | | } |
149 | | |
150 | | impl DisplayAs for UnionExec { |
151 | 2 | fn fmt_as( |
152 | 2 | &self, |
153 | 2 | t: DisplayFormatType, |
154 | 2 | f: &mut std::fmt::Formatter, |
155 | 2 | ) -> std::fmt::Result { |
156 | 2 | match t { |
157 | | DisplayFormatType::Default | DisplayFormatType::Verbose => { |
158 | 2 | write!(f, "UnionExec") |
159 | 2 | } |
160 | 2 | } |
161 | 2 | } |
162 | | } |
163 | | |
164 | | impl ExecutionPlan for UnionExec { |
165 | 0 | fn name(&self) -> &'static str { |
166 | 0 | "UnionExec" |
167 | 0 | } |
168 | | |
169 | | /// Return a reference to Any that can be used for downcasting |
170 | 0 | fn as_any(&self) -> &dyn Any { |
171 | 0 | self |
172 | 0 | } |
173 | | |
174 | 26 | fn properties(&self) -> &PlanProperties { |
175 | 26 | &self.cache |
176 | 26 | } |
177 | | |
178 | 2 | fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> { |
179 | 2 | self.inputs.iter().collect() |
180 | 2 | } |
181 | | |
182 | 0 | fn maintains_input_order(&self) -> Vec<bool> { |
183 | | // If the Union has an output ordering, it maintains at least one |
184 | | // child's ordering (i.e. the meet). |
185 | | // For instance, assume that the first child is SortExpr('a','b','c'), |
186 | | // the second child is SortExpr('a','b') and the third child is |
187 | | // SortExpr('a','b'). The output ordering would be SortExpr('a','b'), |
188 | | // which is the "meet" of all input orderings. In this example, this |
189 | | // function will return vec![false, true, true], indicating that we |
190 | | // preserve the orderings for the 2nd and the 3rd children. |
191 | 0 | if let Some(output_ordering) = self.properties().output_ordering() { |
192 | 0 | self.inputs() |
193 | 0 | .iter() |
194 | 0 | .map(|child| { |
195 | 0 | if let Some(child_ordering) = child.output_ordering() { |
196 | 0 | output_ordering.len() == child_ordering.len() |
197 | | } else { |
198 | 0 | false |
199 | | } |
200 | 0 | }) |
201 | 0 | .collect() |
202 | | } else { |
203 | 0 | vec![false; self.inputs().len()] |
204 | | } |
205 | 0 | } |
206 | | |
207 | 0 | fn with_new_children( |
208 | 0 | self: Arc<Self>, |
209 | 0 | children: Vec<Arc<dyn ExecutionPlan>>, |
210 | 0 | ) -> Result<Arc<dyn ExecutionPlan>> { |
211 | 0 | Ok(Arc::new(UnionExec::new(children))) |
212 | 0 | } |
213 | | |
214 | 9 | fn execute( |
215 | 9 | &self, |
216 | 9 | mut partition: usize, |
217 | 9 | context: Arc<TaskContext>, |
218 | 9 | ) -> Result<SendableRecordBatchStream> { |
219 | 9 | trace!("Start UnionExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()0 ); |
220 | 9 | let baseline_metrics = BaselineMetrics::new(&self.metrics, partition); |
221 | 9 | // record the tiny amount of work done in this function so |
222 | 9 | // elapsed_compute is reported as non zero |
223 | 9 | let elapsed_compute = baseline_metrics.elapsed_compute().clone(); |
224 | 9 | let _timer = elapsed_compute.timer(); // record on drop |
225 | | |
226 | | // find partition to execute |
227 | 14 | for input in self.inputs.iter()9 { |
228 | | // Calculate whether partition belongs to the current partition |
229 | 14 | if partition < input.output_partitioning().partition_count() { |
230 | 9 | let stream = input.execute(partition, context)?0 ; |
231 | 9 | debug!("Found a Union partition to execute"0 ); |
232 | 9 | return Ok(Box::pin(ObservedStream::new(stream, baseline_metrics))); |
233 | 5 | } else { |
234 | 5 | partition -= input.output_partitioning().partition_count(); |
235 | 5 | } |
236 | | } |
237 | | |
238 | 0 | warn!("Error in Union: Partition {} not found", partition); |
239 | | |
240 | 0 | exec_err!("Partition {partition} not found in Union") |
241 | 9 | } |
242 | | |
243 | 0 | fn metrics(&self) -> Option<MetricsSet> { |
244 | 0 | Some(self.metrics.clone_inner()) |
245 | 0 | } |
246 | | |
247 | 0 | fn statistics(&self) -> Result<Statistics> { |
248 | 0 | let stats = self |
249 | 0 | .inputs |
250 | 0 | .iter() |
251 | 0 | .map(|stat| stat.statistics()) |
252 | 0 | .collect::<Result<Vec<_>>>()?; |
253 | | |
254 | 0 | Ok(stats |
255 | 0 | .into_iter() |
256 | 0 | .reduce(stats_union) |
257 | 0 | .unwrap_or_else(|| Statistics::new_unknown(&self.schema()))) |
258 | 0 | } |
259 | | |
260 | 0 | fn benefits_from_input_partitioning(&self) -> Vec<bool> { |
261 | 0 | vec![false; self.children().len()] |
262 | 0 | } |
263 | | |
264 | 0 | fn supports_limit_pushdown(&self) -> bool { |
265 | 0 | true |
266 | 0 | } |
267 | | } |
268 | | |
269 | | /// Combines multiple input streams by interleaving them. |
270 | | /// |
271 | | /// This only works if all inputs have the same hash-partitioning. |
272 | | /// |
273 | | /// # Data Flow |
274 | | /// ```text |
275 | | /// +---------+ |
276 | | /// | |---+ |
277 | | /// | Input 1 | | |
278 | | /// | |-------------+ |
279 | | /// +---------+ | | |
280 | | /// | | +---------+ |
281 | | /// +------------------>| | |
282 | | /// +---------------->| Combine |--> |
283 | | /// | +-------------->| | |
284 | | /// | | | +---------+ |
285 | | /// +---------+ | | | |
286 | | /// | |-----+ | | |
287 | | /// | Input 2 | | | |
288 | | /// | |---------------+ |
289 | | /// +---------+ | | | |
290 | | /// | | | +---------+ |
291 | | /// | +-------->| | |
292 | | /// | +------>| Combine |--> |
293 | | /// | +---->| | |
294 | | /// | | +---------+ |
295 | | /// +---------+ | | |
296 | | /// | |-------+ | |
297 | | /// | Input 3 | | |
298 | | /// | |-----------------+ |
299 | | /// +---------+ |
300 | | /// ``` |
301 | | #[derive(Debug)] |
302 | | pub struct InterleaveExec { |
303 | | /// Input execution plan |
304 | | inputs: Vec<Arc<dyn ExecutionPlan>>, |
305 | | /// Execution metrics |
306 | | metrics: ExecutionPlanMetricsSet, |
307 | | /// Cache holding plan properties like equivalences, output partitioning etc. |
308 | | cache: PlanProperties, |
309 | | } |
310 | | |
311 | | impl InterleaveExec { |
312 | | /// Create a new InterleaveExec |
313 | 0 | pub fn try_new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Result<Self> { |
314 | 0 | if !can_interleave(inputs.iter()) { |
315 | 0 | return internal_err!( |
316 | 0 | "Not all InterleaveExec children have a consistent hash partitioning" |
317 | 0 | ); |
318 | 0 | } |
319 | 0 | let cache = Self::compute_properties(&inputs); |
320 | 0 | Ok(InterleaveExec { |
321 | 0 | inputs, |
322 | 0 | metrics: ExecutionPlanMetricsSet::new(), |
323 | 0 | cache, |
324 | 0 | }) |
325 | 0 | } |
326 | | |
327 | | /// Get inputs of the execution plan |
328 | 0 | pub fn inputs(&self) -> &Vec<Arc<dyn ExecutionPlan>> { |
329 | 0 | &self.inputs |
330 | 0 | } |
331 | | |
332 | | /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. |
333 | 0 | fn compute_properties(inputs: &[Arc<dyn ExecutionPlan>]) -> PlanProperties { |
334 | 0 | let schema = union_schema(inputs); |
335 | 0 | let eq_properties = EquivalenceProperties::new(schema); |
336 | 0 | // Get output partitioning: |
337 | 0 | let output_partitioning = inputs[0].output_partitioning().clone(); |
338 | 0 | // Determine execution mode: |
339 | 0 | let mode = execution_mode_from_children(inputs.iter()); |
340 | 0 |
|
341 | 0 | PlanProperties::new(eq_properties, output_partitioning, mode) |
342 | 0 | } |
343 | | } |
344 | | |
345 | | impl DisplayAs for InterleaveExec { |
346 | 0 | fn fmt_as( |
347 | 0 | &self, |
348 | 0 | t: DisplayFormatType, |
349 | 0 | f: &mut std::fmt::Formatter, |
350 | 0 | ) -> std::fmt::Result { |
351 | 0 | match t { |
352 | | DisplayFormatType::Default | DisplayFormatType::Verbose => { |
353 | 0 | write!(f, "InterleaveExec") |
354 | 0 | } |
355 | 0 | } |
356 | 0 | } |
357 | | } |
358 | | |
359 | | impl ExecutionPlan for InterleaveExec { |
360 | 0 | fn name(&self) -> &'static str { |
361 | 0 | "InterleaveExec" |
362 | 0 | } |
363 | | |
364 | | /// Return a reference to Any that can be used for downcasting |
365 | 0 | fn as_any(&self) -> &dyn Any { |
366 | 0 | self |
367 | 0 | } |
368 | | |
369 | 0 | fn properties(&self) -> &PlanProperties { |
370 | 0 | &self.cache |
371 | 0 | } |
372 | | |
373 | 0 | fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> { |
374 | 0 | self.inputs.iter().collect() |
375 | 0 | } |
376 | | |
377 | 0 | fn maintains_input_order(&self) -> Vec<bool> { |
378 | 0 | vec![false; self.inputs().len()] |
379 | 0 | } |
380 | | |
381 | 0 | fn with_new_children( |
382 | 0 | self: Arc<Self>, |
383 | 0 | children: Vec<Arc<dyn ExecutionPlan>>, |
384 | 0 | ) -> Result<Arc<dyn ExecutionPlan>> { |
385 | 0 | // New children are no longer interleavable, which might be a bug of optimization rewrite. |
386 | 0 | if !can_interleave(children.iter()) { |
387 | 0 | return internal_err!( |
388 | 0 | "Can not create InterleaveExec: new children can not be interleaved" |
389 | 0 | ); |
390 | 0 | } |
391 | 0 | Ok(Arc::new(InterleaveExec::try_new(children)?)) |
392 | 0 | } |
393 | | |
394 | 0 | fn execute( |
395 | 0 | &self, |
396 | 0 | partition: usize, |
397 | 0 | context: Arc<TaskContext>, |
398 | 0 | ) -> Result<SendableRecordBatchStream> { |
399 | 0 | trace!("Start InterleaveExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()); |
400 | 0 | let baseline_metrics = BaselineMetrics::new(&self.metrics, partition); |
401 | 0 | // record the tiny amount of work done in this function so |
402 | 0 | // elapsed_compute is reported as non zero |
403 | 0 | let elapsed_compute = baseline_metrics.elapsed_compute().clone(); |
404 | 0 | let _timer = elapsed_compute.timer(); // record on drop |
405 | 0 |
|
406 | 0 | let mut input_stream_vec = vec![]; |
407 | 0 | for input in self.inputs.iter() { |
408 | 0 | if partition < input.output_partitioning().partition_count() { |
409 | 0 | input_stream_vec.push(input.execute(partition, Arc::clone(&context))?); |
410 | | } else { |
411 | | // Do not find a partition to execute |
412 | 0 | break; |
413 | | } |
414 | | } |
415 | 0 | if input_stream_vec.len() == self.inputs.len() { |
416 | 0 | let stream = Box::pin(CombinedRecordBatchStream::new( |
417 | 0 | self.schema(), |
418 | 0 | input_stream_vec, |
419 | 0 | )); |
420 | 0 | return Ok(Box::pin(ObservedStream::new(stream, baseline_metrics))); |
421 | 0 | } |
422 | 0 |
|
423 | 0 | warn!("Error in InterleaveExec: Partition {} not found", partition); |
424 | | |
425 | 0 | exec_err!("Partition {partition} not found in InterleaveExec") |
426 | 0 | } |
427 | | |
428 | 0 | fn metrics(&self) -> Option<MetricsSet> { |
429 | 0 | Some(self.metrics.clone_inner()) |
430 | 0 | } |
431 | | |
432 | 0 | fn statistics(&self) -> Result<Statistics> { |
433 | 0 | let stats = self |
434 | 0 | .inputs |
435 | 0 | .iter() |
436 | 0 | .map(|stat| stat.statistics()) |
437 | 0 | .collect::<Result<Vec<_>>>()?; |
438 | | |
439 | 0 | Ok(stats |
440 | 0 | .into_iter() |
441 | 0 | .reduce(stats_union) |
442 | 0 | .unwrap_or_else(|| Statistics::new_unknown(&self.schema()))) |
443 | 0 | } |
444 | | |
445 | 0 | fn benefits_from_input_partitioning(&self) -> Vec<bool> { |
446 | 0 | vec![false; self.children().len()] |
447 | 0 | } |
448 | | } |
449 | | |
450 | | /// If all the input partitions have the same Hash partition spec with the first_input_partition |
451 | | /// The InterleaveExec is partition aware. |
452 | | /// |
453 | | /// It might be too strict here in the case that the input partition specs are compatible but not exactly the same. |
454 | | /// For example one input partition has the partition spec Hash('a','b','c') and |
455 | | /// other has the partition spec Hash('a'), It is safe to derive the out partition with the spec Hash('a','b','c'). |
456 | 0 | pub fn can_interleave<T: Borrow<Arc<dyn ExecutionPlan>>>( |
457 | 0 | mut inputs: impl Iterator<Item = T>, |
458 | 0 | ) -> bool { |
459 | 0 | let Some(first) = inputs.next() else { |
460 | 0 | return false; |
461 | | }; |
462 | | |
463 | 0 | let reference = first.borrow().output_partitioning(); |
464 | 0 | matches!(reference, Partitioning::Hash(_, _)) |
465 | 0 | && inputs |
466 | 0 | .map(|plan| plan.borrow().output_partitioning().clone()) |
467 | 0 | .all(|partition| partition == *reference) |
468 | 0 | } |
469 | | |
470 | 5 | fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> SchemaRef { |
471 | 5 | let fields: Vec<Field> = (0..inputs[0].schema().fields().len()) |
472 | 17 | .map(|i| { |
473 | 17 | inputs |
474 | 17 | .iter() |
475 | 19 | .filter_map(|input| { |
476 | 19 | if input.schema().fields().len() > i { |
477 | 19 | let field = input.schema().field(i).clone(); |
478 | 19 | let right_hand_metdata = inputs |
479 | 19 | .get(1) |
480 | 19 | .map(|right_input| { |
481 | 19 | right_input.schema().field(i).metadata().clone() |
482 | 19 | }) |
483 | 19 | .unwrap_or_default(); |
484 | 19 | let mut metadata = field.metadata().clone(); |
485 | 19 | metadata.extend(right_hand_metdata); |
486 | 19 | Some(field.with_metadata(metadata)) |
487 | | } else { |
488 | 0 | None |
489 | | } |
490 | 19 | }) |
491 | 19 | .find_or_first(|f| f.is_nullable()) |
492 | 17 | .unwrap() |
493 | 17 | }) |
494 | 5 | .collect(); |
495 | 5 | |
496 | 5 | Arc::new(Schema::new_with_metadata( |
497 | 5 | fields, |
498 | 5 | inputs[0].schema().metadata().clone(), |
499 | 5 | )) |
500 | 5 | } |
501 | | |
502 | | /// CombinedRecordBatchStream can be used to combine a Vec of SendableRecordBatchStreams into one |
503 | | struct CombinedRecordBatchStream { |
504 | | /// Schema wrapped by Arc |
505 | | schema: SchemaRef, |
506 | | /// Stream entries |
507 | | entries: Vec<SendableRecordBatchStream>, |
508 | | } |
509 | | |
510 | | impl CombinedRecordBatchStream { |
511 | | /// Create an CombinedRecordBatchStream |
512 | 0 | pub fn new(schema: SchemaRef, entries: Vec<SendableRecordBatchStream>) -> Self { |
513 | 0 | Self { schema, entries } |
514 | 0 | } |
515 | | } |
516 | | |
517 | | impl RecordBatchStream for CombinedRecordBatchStream { |
518 | 0 | fn schema(&self) -> SchemaRef { |
519 | 0 | Arc::clone(&self.schema) |
520 | 0 | } |
521 | | } |
522 | | |
523 | | impl Stream for CombinedRecordBatchStream { |
524 | | type Item = Result<RecordBatch>; |
525 | | |
526 | 0 | fn poll_next( |
527 | 0 | mut self: Pin<&mut Self>, |
528 | 0 | cx: &mut Context<'_>, |
529 | 0 | ) -> Poll<Option<Self::Item>> { |
530 | | use Poll::*; |
531 | | |
532 | 0 | let start = thread_rng_n(self.entries.len() as u32) as usize; |
533 | 0 | let mut idx = start; |
534 | 0 |
|
535 | 0 | for _ in 0..self.entries.len() { |
536 | 0 | let stream = self.entries.get_mut(idx).unwrap(); |
537 | 0 |
|
538 | 0 | match Pin::new(stream).poll_next(cx) { |
539 | 0 | Ready(Some(val)) => return Ready(Some(val)), |
540 | | Ready(None) => { |
541 | | // Remove the entry |
542 | 0 | self.entries.swap_remove(idx); |
543 | 0 |
|
544 | 0 | // Check if this was the last entry, if so the cursor needs |
545 | 0 | // to wrap |
546 | 0 | if idx == self.entries.len() { |
547 | 0 | idx = 0; |
548 | 0 | } else if idx < start && start <= self.entries.len() { |
549 | 0 | // The stream being swapped into the current index has |
550 | 0 | // already been polled, so skip it. |
551 | 0 | idx = idx.wrapping_add(1) % self.entries.len(); |
552 | 0 | } |
553 | | } |
554 | 0 | Pending => { |
555 | 0 | idx = idx.wrapping_add(1) % self.entries.len(); |
556 | 0 | } |
557 | | } |
558 | | } |
559 | | |
560 | | // If the map is empty, then the stream is complete. |
561 | 0 | if self.entries.is_empty() { |
562 | 0 | Ready(None) |
563 | | } else { |
564 | 0 | Pending |
565 | | } |
566 | 0 | } |
567 | | } |
568 | | |
569 | 3 | fn col_stats_union( |
570 | 3 | mut left: ColumnStatistics, |
571 | 3 | right: ColumnStatistics, |
572 | 3 | ) -> ColumnStatistics { |
573 | 3 | left.distinct_count = Precision::Absent; |
574 | 3 | left.min_value = left.min_value.min(&right.min_value); |
575 | 3 | left.max_value = left.max_value.max(&right.max_value); |
576 | 3 | left.null_count = left.null_count.add(&right.null_count); |
577 | 3 | |
578 | 3 | left |
579 | 3 | } |
580 | | |
581 | 1 | fn stats_union(mut left: Statistics, right: Statistics) -> Statistics { |
582 | 1 | left.num_rows = left.num_rows.add(&right.num_rows); |
583 | 1 | left.total_byte_size = left.total_byte_size.add(&right.total_byte_size); |
584 | 1 | left.column_statistics = left |
585 | 1 | .column_statistics |
586 | 1 | .into_iter() |
587 | 1 | .zip(right.column_statistics) |
588 | 3 | .map(|(a, b)| col_stats_union(a, b)) |
589 | 1 | .collect::<Vec<_>>(); |
590 | 1 | left |
591 | 1 | } |
592 | | |
593 | | #[cfg(test)] |
594 | | mod tests { |
595 | | use super::*; |
596 | | use crate::collect; |
597 | | use crate::memory::MemoryExec; |
598 | | use crate::test; |
599 | | |
600 | | use arrow_schema::{DataType, SortOptions}; |
601 | | use datafusion_common::ScalarValue; |
602 | | use datafusion_physical_expr::expressions::col; |
603 | | use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; |
604 | | |
605 | | // Generate a schema which consists of 7 columns (a, b, c, d, e, f, g) |
606 | 1 | fn create_test_schema() -> Result<SchemaRef> { |
607 | 1 | let a = Field::new("a", DataType::Int32, true); |
608 | 1 | let b = Field::new("b", DataType::Int32, true); |
609 | 1 | let c = Field::new("c", DataType::Int32, true); |
610 | 1 | let d = Field::new("d", DataType::Int32, true); |
611 | 1 | let e = Field::new("e", DataType::Int32, true); |
612 | 1 | let f = Field::new("f", DataType::Int32, true); |
613 | 1 | let g = Field::new("g", DataType::Int32, true); |
614 | 1 | let schema = Arc::new(Schema::new(vec![a, b, c, d, e, f, g])); |
615 | 1 | |
616 | 1 | Ok(schema) |
617 | 1 | } |
618 | | |
619 | | // Convert each tuple to PhysicalSortExpr |
620 | 9 | fn convert_to_sort_exprs( |
621 | 9 | in_data: &[(&Arc<dyn PhysicalExpr>, SortOptions)], |
622 | 9 | ) -> Vec<PhysicalSortExpr> { |
623 | 9 | in_data |
624 | 9 | .iter() |
625 | 22 | .map(|(expr, options)| PhysicalSortExpr { |
626 | 22 | expr: Arc::clone(*expr), |
627 | 22 | options: *options, |
628 | 22 | }) |
629 | 9 | .collect::<Vec<_>>() |
630 | 9 | } |
631 | | |
632 | | #[tokio::test] |
633 | 1 | async fn test_union_partitions() -> Result<()> { |
634 | 1 | let task_ctx = Arc::new(TaskContext::default()); |
635 | 1 | |
636 | 1 | // Create inputs with different partitioning |
637 | 1 | let csv = test::scan_partitioned(4); |
638 | 1 | let csv2 = test::scan_partitioned(5); |
639 | 1 | |
640 | 1 | let union_exec = Arc::new(UnionExec::new(vec![csv, csv2])); |
641 | 1 | |
642 | 1 | // Should have 9 partitions and 9 output batches |
643 | 1 | assert_eq!( |
644 | 1 | union_exec |
645 | 1 | .properties() |
646 | 1 | .output_partitioning() |
647 | 1 | .partition_count(), |
648 | 1 | 9 |
649 | 1 | ); |
650 | 1 | |
651 | 1 | let result: Vec<RecordBatch> = collect(union_exec, task_ctx).await?0 ; |
652 | 1 | assert_eq!(result.len(), 9); |
653 | 1 | |
654 | 1 | Ok(()) |
655 | 1 | } |
656 | | |
657 | | #[tokio::test] |
658 | 1 | async fn test_stats_union() { |
659 | 1 | let left = Statistics { |
660 | 1 | num_rows: Precision::Exact(5), |
661 | 1 | total_byte_size: Precision::Exact(23), |
662 | 1 | column_statistics: vec![ |
663 | 1 | ColumnStatistics { |
664 | 1 | distinct_count: Precision::Exact(5), |
665 | 1 | max_value: Precision::Exact(ScalarValue::Int64(Some(21))), |
666 | 1 | min_value: Precision::Exact(ScalarValue::Int64(Some(-4))), |
667 | 1 | null_count: Precision::Exact(0), |
668 | 1 | }, |
669 | 1 | ColumnStatistics { |
670 | 1 | distinct_count: Precision::Exact(1), |
671 | 1 | max_value: Precision::Exact(ScalarValue::from("x")), |
672 | 1 | min_value: Precision::Exact(ScalarValue::from("a")), |
673 | 1 | null_count: Precision::Exact(3), |
674 | 1 | }, |
675 | 1 | ColumnStatistics { |
676 | 1 | distinct_count: Precision::Absent, |
677 | 1 | max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))), |
678 | 1 | min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))), |
679 | 1 | null_count: Precision::Absent, |
680 | 1 | }, |
681 | 1 | ], |
682 | 1 | }; |
683 | 1 | |
684 | 1 | let right = Statistics { |
685 | 1 | num_rows: Precision::Exact(7), |
686 | 1 | total_byte_size: Precision::Exact(29), |
687 | 1 | column_statistics: vec![ |
688 | 1 | ColumnStatistics { |
689 | 1 | distinct_count: Precision::Exact(3), |
690 | 1 | max_value: Precision::Exact(ScalarValue::Int64(Some(34))), |
691 | 1 | min_value: Precision::Exact(ScalarValue::Int64(Some(1))), |
692 | 1 | null_count: Precision::Exact(1), |
693 | 1 | }, |
694 | 1 | ColumnStatistics { |
695 | 1 | distinct_count: Precision::Absent, |
696 | 1 | max_value: Precision::Exact(ScalarValue::from("c")), |
697 | 1 | min_value: Precision::Exact(ScalarValue::from("b")), |
698 | 1 | null_count: Precision::Absent, |
699 | 1 | }, |
700 | 1 | ColumnStatistics { |
701 | 1 | distinct_count: Precision::Absent, |
702 | 1 | max_value: Precision::Absent, |
703 | 1 | min_value: Precision::Absent, |
704 | 1 | null_count: Precision::Absent, |
705 | 1 | }, |
706 | 1 | ], |
707 | 1 | }; |
708 | 1 | |
709 | 1 | let result = stats_union(left, right); |
710 | 1 | let expected = Statistics { |
711 | 1 | num_rows: Precision::Exact(12), |
712 | 1 | total_byte_size: Precision::Exact(52), |
713 | 1 | column_statistics: vec![ |
714 | 1 | ColumnStatistics { |
715 | 1 | distinct_count: Precision::Absent, |
716 | 1 | max_value: Precision::Exact(ScalarValue::Int64(Some(34))), |
717 | 1 | min_value: Precision::Exact(ScalarValue::Int64(Some(-4))), |
718 | 1 | null_count: Precision::Exact(1), |
719 | 1 | }, |
720 | 1 | ColumnStatistics { |
721 | 1 | distinct_count: Precision::Absent, |
722 | 1 | max_value: Precision::Exact(ScalarValue::from("x")), |
723 | 1 | min_value: Precision::Exact(ScalarValue::from("a")), |
724 | 1 | null_count: Precision::Absent, |
725 | 1 | }, |
726 | 1 | ColumnStatistics { |
727 | 1 | distinct_count: Precision::Absent, |
728 | 1 | max_value: Precision::Absent, |
729 | 1 | min_value: Precision::Absent, |
730 | 1 | null_count: Precision::Absent, |
731 | 1 | }, |
732 | 1 | ], |
733 | 1 | }; |
734 | 1 | |
735 | 1 | assert_eq!(result, expected); |
736 | 1 | } |
737 | | |
738 | | #[tokio::test] |
739 | 1 | async fn test_union_equivalence_properties() -> Result<()> { |
740 | 1 | let schema = create_test_schema()?0 ; |
741 | 1 | let col_a = &col("a", &schema)?0 ; |
742 | 1 | let col_b = &col("b", &schema)?0 ; |
743 | 1 | let col_c = &col("c", &schema)?0 ; |
744 | 1 | let col_d = &col("d", &schema)?0 ; |
745 | 1 | let col_e = &col("e", &schema)?0 ; |
746 | 1 | let col_f = &col("f", &schema)?0 ; |
747 | 1 | let options = SortOptions::default(); |
748 | 1 | let test_cases = [ |
749 | 1 | //-----------TEST CASE 1----------// |
750 | 1 | ( |
751 | 1 | // First child orderings |
752 | 1 | vec![ |
753 | 1 | // [a ASC, b ASC, f ASC] |
754 | 1 | vec![(col_a, options), (col_b, options), (col_f, options)], |
755 | 1 | ], |
756 | 1 | // Second child orderings |
757 | 1 | vec![ |
758 | 1 | // [a ASC, b ASC, c ASC] |
759 | 1 | vec![(col_a, options), (col_b, options), (col_c, options)], |
760 | 1 | // [a ASC, b ASC, f ASC] |
761 | 1 | vec![(col_a, options), (col_b, options), (col_f, options)], |
762 | 1 | ], |
763 | 1 | // Union output orderings |
764 | 1 | vec![ |
765 | 1 | // [a ASC, b ASC, f ASC] |
766 | 1 | vec![(col_a, options), (col_b, options), (col_f, options)], |
767 | 1 | ], |
768 | 1 | ), |
769 | 1 | //-----------TEST CASE 2----------// |
770 | 1 | ( |
771 | 1 | // First child orderings |
772 | 1 | vec![ |
773 | 1 | // [a ASC, b ASC, f ASC] |
774 | 1 | vec![(col_a, options), (col_b, options), (col_f, options)], |
775 | 1 | // d ASC |
776 | 1 | vec![(col_d, options)], |
777 | 1 | ], |
778 | 1 | // Second child orderings |
779 | 1 | vec![ |
780 | 1 | // [a ASC, b ASC, c ASC] |
781 | 1 | vec![(col_a, options), (col_b, options), (col_c, options)], |
782 | 1 | // [e ASC] |
783 | 1 | vec![(col_e, options)], |
784 | 1 | ], |
785 | 1 | // Union output orderings |
786 | 1 | vec![ |
787 | 1 | // [a ASC, b ASC] |
788 | 1 | vec![(col_a, options), (col_b, options)], |
789 | 1 | ], |
790 | 1 | ), |
791 | 1 | ]; |
792 | 1 | |
793 | 1 | for ( |
794 | 2 | test_idx, |
795 | 2 | (first_child_orderings, second_child_orderings, union_orderings), |
796 | 1 | ) in test_cases.iter().enumerate() |
797 | 1 | { |
798 | 2 | let first_orderings = first_child_orderings |
799 | 2 | .iter() |
800 | 3 | .map(|ordering| convert_to_sort_exprs(ordering)) |
801 | 2 | .collect::<Vec<_>>(); |
802 | 2 | let second_orderings = second_child_orderings |
803 | 2 | .iter() |
804 | 4 | .map(|ordering| convert_to_sort_exprs(ordering)) |
805 | 2 | .collect::<Vec<_>>(); |
806 | 2 | let union_expected_orderings = union_orderings |
807 | 2 | .iter() |
808 | 2 | .map(|ordering| convert_to_sort_exprs(ordering)) |
809 | 2 | .collect::<Vec<_>>(); |
810 | 2 | let child1 = Arc::new( |
811 | 2 | MemoryExec::try_new(&[], Arc::clone(&schema), None)?0 |
812 | 2 | .with_sort_information(first_orderings), |
813 | 1 | ); |
814 | 2 | let child2 = Arc::new( |
815 | 2 | MemoryExec::try_new(&[], Arc::clone(&schema), None)?0 |
816 | 2 | .with_sort_information(second_orderings), |
817 | 2 | ); |
818 | 2 | |
819 | 2 | let mut union_expected_eq = EquivalenceProperties::new(Arc::clone(&schema)); |
820 | 2 | union_expected_eq.add_new_orderings(union_expected_orderings); |
821 | 2 | |
822 | 2 | let union = UnionExec::new(vec![child1, child2]); |
823 | 2 | let union_eq_properties = union.properties().equivalence_properties(); |
824 | 2 | let err_msg = format!( |
825 | 2 | "Error in test id: {:?}, test case: {:?}", |
826 | 2 | test_idx, test_cases[test_idx] |
827 | 2 | ); |
828 | 2 | assert_eq_properties_same(union_eq_properties, &union_expected_eq, err_msg); |
829 | 1 | } |
830 | 1 | Ok(()) |
831 | 1 | } |
832 | | |
833 | 2 | fn assert_eq_properties_same( |
834 | 2 | lhs: &EquivalenceProperties, |
835 | 2 | rhs: &EquivalenceProperties, |
836 | 2 | err_msg: String, |
837 | 2 | ) { |
838 | 2 | // Check whether orderings are same. |
839 | 2 | let lhs_orderings = lhs.oeq_class(); |
840 | 2 | let rhs_orderings = &rhs.oeq_class.orderings; |
841 | 2 | assert_eq!(lhs_orderings.len(), rhs_orderings.len(), "{}"0 , err_msg); |
842 | 4 | for rhs_ordering2 in rhs_orderings { |
843 | 2 | assert!(lhs_orderings.contains(rhs_ordering), "{}", err_msg); |
844 | | } |
845 | 2 | } |
846 | | } |