/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/windows/mod.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Physical expressions for window functions |
19 | | |
20 | | use std::borrow::Borrow; |
21 | | use std::sync::Arc; |
22 | | |
23 | | use crate::{ |
24 | | expressions::{ |
25 | | cume_dist, dense_rank, lag, lead, percent_rank, rank, Literal, NthValue, Ntile, |
26 | | PhysicalSortExpr, |
27 | | }, |
28 | | ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr, |
29 | | }; |
30 | | |
31 | | use arrow::datatypes::Schema; |
32 | | use arrow_schema::{DataType, Field, SchemaRef}; |
33 | | use datafusion_common::{ |
34 | | exec_datafusion_err, exec_err, DataFusionError, Result, ScalarValue, |
35 | | }; |
36 | | use datafusion_expr::{ |
37 | | BuiltInWindowFunction, PartitionEvaluator, ReversedUDWF, WindowFrame, |
38 | | WindowFunctionDefinition, WindowUDF, |
39 | | }; |
40 | | use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; |
41 | | use datafusion_physical_expr::equivalence::collapse_lex_req; |
42 | | use datafusion_physical_expr::{ |
43 | | reverse_order_bys, |
44 | | window::{BuiltInWindowFunctionExpr, SlidingAggregateWindowExpr}, |
45 | | ConstExpr, EquivalenceProperties, LexOrdering, PhysicalSortRequirement, |
46 | | }; |
47 | | use itertools::Itertools; |
48 | | |
49 | | mod bounded_window_agg_exec; |
50 | | mod utils; |
51 | | mod window_agg_exec; |
52 | | |
53 | | pub use bounded_window_agg_exec::BoundedWindowAggExec; |
54 | | use datafusion_functions_window_common::field::WindowUDFFieldArgs; |
55 | | use datafusion_physical_expr::expressions::Column; |
56 | | pub use datafusion_physical_expr::window::{ |
57 | | BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr, |
58 | | }; |
59 | | use datafusion_physical_expr_common::sort_expr::LexRequirement; |
60 | | pub use window_agg_exec::WindowAggExec; |
61 | | |
62 | | /// Build field from window function and add it into schema |
63 | 0 | pub fn schema_add_window_field( |
64 | 0 | args: &[Arc<dyn PhysicalExpr>], |
65 | 0 | schema: &Schema, |
66 | 0 | window_fn: &WindowFunctionDefinition, |
67 | 0 | fn_name: &str, |
68 | 0 | ) -> Result<Arc<Schema>> { |
69 | 0 | let data_types = args |
70 | 0 | .iter() |
71 | 0 | .map(|e| Arc::clone(e).as_ref().data_type(schema)) |
72 | 0 | .collect::<Result<Vec<_>>>()?; |
73 | 0 | let nullability = args |
74 | 0 | .iter() |
75 | 0 | .map(|e| Arc::clone(e).as_ref().nullable(schema)) |
76 | 0 | .collect::<Result<Vec<_>>>()?; |
77 | 0 | let window_expr_return_type = |
78 | 0 | window_fn.return_type(&data_types, &nullability, fn_name)?; |
79 | 0 | let mut window_fields = schema |
80 | 0 | .fields() |
81 | 0 | .iter() |
82 | 0 | .map(|f| f.as_ref().clone()) |
83 | 0 | .collect_vec(); |
84 | 0 | // Skip extending schema for UDAF |
85 | 0 | if let WindowFunctionDefinition::AggregateUDF(_) = window_fn { |
86 | 0 | Ok(Arc::new(Schema::new(window_fields))) |
87 | | } else { |
88 | 0 | window_fields.extend_from_slice(&[Field::new( |
89 | 0 | fn_name, |
90 | 0 | window_expr_return_type, |
91 | 0 | false, |
92 | 0 | )]); |
93 | 0 | Ok(Arc::new(Schema::new(window_fields))) |
94 | | } |
95 | 0 | } |
96 | | |
97 | | /// Create a physical expression for window function |
98 | | #[allow(clippy::too_many_arguments)] |
99 | 2 | pub fn create_window_expr( |
100 | 2 | fun: &WindowFunctionDefinition, |
101 | 2 | name: String, |
102 | 2 | args: &[Arc<dyn PhysicalExpr>], |
103 | 2 | partition_by: &[Arc<dyn PhysicalExpr>], |
104 | 2 | order_by: &[PhysicalSortExpr], |
105 | 2 | window_frame: Arc<WindowFrame>, |
106 | 2 | input_schema: &Schema, |
107 | 2 | ignore_nulls: bool, |
108 | 2 | ) -> Result<Arc<dyn WindowExpr>> { |
109 | 2 | Ok(match fun { |
110 | 0 | WindowFunctionDefinition::BuiltInWindowFunction(fun) => { |
111 | 0 | Arc::new(BuiltInWindowExpr::new( |
112 | 0 | create_built_in_window_expr(fun, args, input_schema, name, ignore_nulls)?, |
113 | 0 | partition_by, |
114 | 0 | order_by, |
115 | 0 | window_frame, |
116 | | )) |
117 | | } |
118 | 2 | WindowFunctionDefinition::AggregateUDF(fun) => { |
119 | 2 | let aggregate = AggregateExprBuilder::new(Arc::clone(fun), args.to_vec()) |
120 | 2 | .schema(Arc::new(input_schema.clone())) |
121 | 2 | .alias(name) |
122 | 2 | .with_ignore_nulls(ignore_nulls) |
123 | 2 | .build()?0 ; |
124 | 2 | window_expr_from_aggregate_expr( |
125 | 2 | partition_by, |
126 | 2 | order_by, |
127 | 2 | window_frame, |
128 | 2 | aggregate, |
129 | 2 | ) |
130 | | } |
131 | | // TODO: Ordering not supported for Window UDFs yet |
132 | 0 | WindowFunctionDefinition::WindowUDF(fun) => Arc::new(BuiltInWindowExpr::new( |
133 | 0 | create_udwf_window_expr(fun, args, input_schema, name, ignore_nulls)?, |
134 | 0 | partition_by, |
135 | 0 | order_by, |
136 | 0 | window_frame, |
137 | | )), |
138 | | }) |
139 | 2 | } |
140 | | |
141 | | /// Creates an appropriate [`WindowExpr`] based on the window frame and |
142 | 2 | fn window_expr_from_aggregate_expr( |
143 | 2 | partition_by: &[Arc<dyn PhysicalExpr>], |
144 | 2 | order_by: &[PhysicalSortExpr], |
145 | 2 | window_frame: Arc<WindowFrame>, |
146 | 2 | aggregate: AggregateFunctionExpr, |
147 | 2 | ) -> Arc<dyn WindowExpr> { |
148 | 2 | // Is there a potentially unlimited sized window frame? |
149 | 2 | let unbounded_window = window_frame.start_bound.is_unbounded(); |
150 | 2 | |
151 | 2 | if !unbounded_window { |
152 | 1 | Arc::new(SlidingAggregateWindowExpr::new( |
153 | 1 | aggregate, |
154 | 1 | partition_by, |
155 | 1 | order_by, |
156 | 1 | window_frame, |
157 | 1 | )) |
158 | | } else { |
159 | 1 | Arc::new(PlainAggregateWindowExpr::new( |
160 | 1 | aggregate, |
161 | 1 | partition_by, |
162 | 1 | order_by, |
163 | 1 | window_frame, |
164 | 1 | )) |
165 | | } |
166 | 2 | } |
167 | | |
168 | 0 | fn get_scalar_value_from_args( |
169 | 0 | args: &[Arc<dyn PhysicalExpr>], |
170 | 0 | index: usize, |
171 | 0 | ) -> Result<Option<ScalarValue>> { |
172 | 0 | Ok(if let Some(field) = args.get(index) { |
173 | 0 | let tmp = field |
174 | 0 | .as_any() |
175 | 0 | .downcast_ref::<Literal>() |
176 | 0 | .ok_or_else(|| DataFusionError::NotImplemented( |
177 | 0 | format!("There is only support Literal types for field at idx: {index} in Window Function"), |
178 | 0 | ))? |
179 | 0 | .value() |
180 | 0 | .clone(); |
181 | 0 | Some(tmp) |
182 | | } else { |
183 | 0 | None |
184 | | }) |
185 | 0 | } |
186 | | |
187 | 0 | fn get_signed_integer(value: ScalarValue) -> Result<i64> { |
188 | 0 | if value.is_null() { |
189 | 0 | return Ok(0); |
190 | 0 | } |
191 | 0 |
|
192 | 0 | if !value.data_type().is_integer() { |
193 | 0 | return exec_err!("Expected an integer value"); |
194 | 0 | } |
195 | 0 |
|
196 | 0 | value.cast_to(&DataType::Int64)?.try_into() |
197 | 0 | } |
198 | | |
199 | 0 | fn get_unsigned_integer(value: ScalarValue) -> Result<u64> { |
200 | 0 | if value.is_null() { |
201 | 0 | return Ok(0); |
202 | 0 | } |
203 | 0 |
|
204 | 0 | if !value.data_type().is_integer() { |
205 | 0 | return exec_err!("Expected an integer value"); |
206 | 0 | } |
207 | 0 |
|
208 | 0 | value.cast_to(&DataType::UInt64)?.try_into() |
209 | 0 | } |
210 | | |
211 | 0 | fn get_casted_value( |
212 | 0 | default_value: Option<ScalarValue>, |
213 | 0 | dtype: &DataType, |
214 | 0 | ) -> Result<ScalarValue> { |
215 | 0 | match default_value { |
216 | 0 | Some(v) if !v.data_type().is_null() => v.cast_to(dtype), |
217 | | // If None or Null datatype |
218 | 0 | _ => ScalarValue::try_from(dtype), |
219 | | } |
220 | 0 | } |
221 | | |
222 | 0 | fn create_built_in_window_expr( |
223 | 0 | fun: &BuiltInWindowFunction, |
224 | 0 | args: &[Arc<dyn PhysicalExpr>], |
225 | 0 | input_schema: &Schema, |
226 | 0 | name: String, |
227 | 0 | ignore_nulls: bool, |
228 | 0 | ) -> Result<Arc<dyn BuiltInWindowFunctionExpr>> { |
229 | | // derive the output datatype from incoming schema |
230 | 0 | let out_data_type: &DataType = input_schema.field_with_name(&name)?.data_type(); |
231 | 0 |
|
232 | 0 | Ok(match fun { |
233 | 0 | BuiltInWindowFunction::Rank => Arc::new(rank(name, out_data_type)), |
234 | 0 | BuiltInWindowFunction::DenseRank => Arc::new(dense_rank(name, out_data_type)), |
235 | 0 | BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name, out_data_type)), |
236 | 0 | BuiltInWindowFunction::CumeDist => Arc::new(cume_dist(name, out_data_type)), |
237 | | BuiltInWindowFunction::Ntile => { |
238 | 0 | let n = get_scalar_value_from_args(args, 0)?.ok_or_else(|| { |
239 | 0 | DataFusionError::Execution( |
240 | 0 | "NTILE requires a positive integer".to_string(), |
241 | 0 | ) |
242 | 0 | })?; |
243 | | |
244 | 0 | if n.is_null() { |
245 | 0 | return exec_err!("NTILE requires a positive integer, but finds NULL"); |
246 | 0 | } |
247 | 0 |
|
248 | 0 | if n.is_unsigned() { |
249 | 0 | let n = get_unsigned_integer(n)?; |
250 | 0 | Arc::new(Ntile::new(name, n, out_data_type)) |
251 | | } else { |
252 | 0 | let n: i64 = get_signed_integer(n)?; |
253 | 0 | if n <= 0 { |
254 | 0 | return exec_err!("NTILE requires a positive integer"); |
255 | 0 | } |
256 | 0 | Arc::new(Ntile::new(name, n as u64, out_data_type)) |
257 | | } |
258 | | } |
259 | | BuiltInWindowFunction::Lag => { |
260 | 0 | let arg = Arc::clone(&args[0]); |
261 | 0 | let shift_offset = get_scalar_value_from_args(args, 1)? |
262 | 0 | .map(get_signed_integer) |
263 | 0 | .map_or(Ok(None), |v| v.map(Some))?; |
264 | 0 | let default_value = |
265 | 0 | get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; |
266 | 0 | Arc::new(lag( |
267 | 0 | name, |
268 | 0 | out_data_type.clone(), |
269 | 0 | arg, |
270 | 0 | shift_offset, |
271 | 0 | default_value, |
272 | 0 | ignore_nulls, |
273 | 0 | )) |
274 | | } |
275 | | BuiltInWindowFunction::Lead => { |
276 | 0 | let arg = Arc::clone(&args[0]); |
277 | 0 | let shift_offset = get_scalar_value_from_args(args, 1)? |
278 | 0 | .map(get_signed_integer) |
279 | 0 | .map_or(Ok(None), |v| v.map(Some))?; |
280 | 0 | let default_value = |
281 | 0 | get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; |
282 | 0 | Arc::new(lead( |
283 | 0 | name, |
284 | 0 | out_data_type.clone(), |
285 | 0 | arg, |
286 | 0 | shift_offset, |
287 | 0 | default_value, |
288 | 0 | ignore_nulls, |
289 | 0 | )) |
290 | | } |
291 | | BuiltInWindowFunction::NthValue => { |
292 | 0 | let arg = Arc::clone(&args[0]); |
293 | 0 | let n = get_signed_integer( |
294 | 0 | args[1] |
295 | 0 | .as_any() |
296 | 0 | .downcast_ref::<Literal>() |
297 | 0 | .ok_or_else(|| { |
298 | 0 | exec_datafusion_err!("Expected a signed integer literal for the second argument of nth_value, got {}", args[1]) |
299 | 0 | })? |
300 | 0 | .value() |
301 | 0 | .clone(), |
302 | 0 | )?; |
303 | 0 | Arc::new(NthValue::nth( |
304 | 0 | name, |
305 | 0 | arg, |
306 | 0 | out_data_type.clone(), |
307 | 0 | n, |
308 | 0 | ignore_nulls, |
309 | 0 | )?) |
310 | | } |
311 | | BuiltInWindowFunction::FirstValue => { |
312 | 0 | let arg = Arc::clone(&args[0]); |
313 | 0 | Arc::new(NthValue::first( |
314 | 0 | name, |
315 | 0 | arg, |
316 | 0 | out_data_type.clone(), |
317 | 0 | ignore_nulls, |
318 | 0 | )) |
319 | | } |
320 | | BuiltInWindowFunction::LastValue => { |
321 | 0 | let arg = Arc::clone(&args[0]); |
322 | 0 | Arc::new(NthValue::last( |
323 | 0 | name, |
324 | 0 | arg, |
325 | 0 | out_data_type.clone(), |
326 | 0 | ignore_nulls, |
327 | 0 | )) |
328 | | } |
329 | | }) |
330 | 0 | } |
331 | | |
332 | | /// Creates a `BuiltInWindowFunctionExpr` suitable for a user defined window function |
333 | 0 | fn create_udwf_window_expr( |
334 | 0 | fun: &Arc<WindowUDF>, |
335 | 0 | args: &[Arc<dyn PhysicalExpr>], |
336 | 0 | input_schema: &Schema, |
337 | 0 | name: String, |
338 | 0 | ignore_nulls: bool, |
339 | 0 | ) -> Result<Arc<dyn BuiltInWindowFunctionExpr>> { |
340 | | // need to get the types into an owned vec for some reason |
341 | 0 | let input_types: Vec<_> = args |
342 | 0 | .iter() |
343 | 0 | .map(|arg| arg.data_type(input_schema)) |
344 | 0 | .collect::<Result<_>>()?; |
345 | | |
346 | 0 | Ok(Arc::new(WindowUDFExpr { |
347 | 0 | fun: Arc::clone(fun), |
348 | 0 | args: args.to_vec(), |
349 | 0 | input_types, |
350 | 0 | name, |
351 | 0 | is_reversed: false, |
352 | 0 | ignore_nulls, |
353 | 0 | })) |
354 | 0 | } |
355 | | |
356 | | /// Implements [`BuiltInWindowFunctionExpr`] for [`WindowUDF`] |
357 | | #[derive(Clone, Debug)] |
358 | | struct WindowUDFExpr { |
359 | | fun: Arc<WindowUDF>, |
360 | | args: Vec<Arc<dyn PhysicalExpr>>, |
361 | | /// Display name |
362 | | name: String, |
363 | | /// Types of input expressions |
364 | | input_types: Vec<DataType>, |
365 | | /// This is set to `true` only if the user-defined window function |
366 | | /// expression supports evaluation in reverse order, and the |
367 | | /// evaluation order is reversed. |
368 | | is_reversed: bool, |
369 | | /// Set to `true` if `IGNORE NULLS` is defined, `false` otherwise. |
370 | | ignore_nulls: bool, |
371 | | } |
372 | | |
373 | | impl BuiltInWindowFunctionExpr for WindowUDFExpr { |
374 | 0 | fn as_any(&self) -> &dyn std::any::Any { |
375 | 0 | self |
376 | 0 | } |
377 | | |
378 | 0 | fn field(&self) -> Result<Field> { |
379 | 0 | self.fun |
380 | 0 | .field(WindowUDFFieldArgs::new(&self.input_types, &self.name)) |
381 | 0 | } |
382 | | |
383 | 0 | fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> { |
384 | 0 | self.args.clone() |
385 | 0 | } |
386 | | |
387 | 0 | fn create_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> { |
388 | 0 | self.fun.partition_evaluator_factory() |
389 | 0 | } |
390 | | |
391 | 0 | fn name(&self) -> &str { |
392 | 0 | &self.name |
393 | 0 | } |
394 | | |
395 | 0 | fn reverse_expr(&self) -> Option<Arc<dyn BuiltInWindowFunctionExpr>> { |
396 | 0 | match self.fun.reverse_expr() { |
397 | 0 | ReversedUDWF::Identical => Some(Arc::new(self.clone())), |
398 | 0 | ReversedUDWF::NotSupported => None, |
399 | 0 | ReversedUDWF::Reversed(fun) => Some(Arc::new(WindowUDFExpr { |
400 | 0 | fun, |
401 | 0 | args: self.args.clone(), |
402 | 0 | name: self.name.clone(), |
403 | 0 | input_types: self.input_types.clone(), |
404 | 0 | is_reversed: !self.is_reversed, |
405 | 0 | ignore_nulls: self.ignore_nulls, |
406 | 0 | })), |
407 | | } |
408 | 0 | } |
409 | | |
410 | 0 | fn get_result_ordering(&self, schema: &SchemaRef) -> Option<PhysicalSortExpr> { |
411 | 0 | self.fun |
412 | 0 | .sort_options() |
413 | 0 | .zip(schema.column_with_name(self.name())) |
414 | 0 | .map(|(options, (idx, field))| { |
415 | 0 | let expr = Arc::new(Column::new(field.name(), idx)); |
416 | 0 | PhysicalSortExpr { expr, options } |
417 | 0 | }) |
418 | 0 | } |
419 | | } |
420 | | |
421 | 4 | pub(crate) fn calc_requirements< |
422 | 4 | T: Borrow<Arc<dyn PhysicalExpr>>, |
423 | 4 | S: Borrow<PhysicalSortExpr>, |
424 | 4 | >( |
425 | 4 | partition_by_exprs: impl IntoIterator<Item = T>, |
426 | 4 | orderby_sort_exprs: impl IntoIterator<Item = S>, |
427 | 4 | ) -> Option<LexRequirement> { |
428 | 4 | let mut sort_reqs = LexRequirement::new( |
429 | 4 | partition_by_exprs |
430 | 4 | .into_iter() |
431 | 5 | .map(|partition_by| { |
432 | 5 | PhysicalSortRequirement::new(Arc::clone(partition_by.borrow()), None) |
433 | 5 | }) |
434 | 4 | .collect::<Vec<_>>(), |
435 | 4 | ); |
436 | 6 | for element in orderby_sort_exprs.into_iter()4 { |
437 | 6 | let PhysicalSortExpr { expr, options } = element.borrow(); |
438 | 9 | if !sort_reqs.iter().any(|e| e.expr.eq(expr))6 { |
439 | 4 | sort_reqs.push(PhysicalSortRequirement::new( |
440 | 4 | Arc::clone(expr), |
441 | 4 | Some(*options), |
442 | 4 | )); |
443 | 4 | }2 |
444 | | } |
445 | | // Convert empty result to None. Otherwise wrap result inside Some() |
446 | 4 | (!sort_reqs.is_empty()).then_some(sort_reqs) |
447 | 4 | } |
448 | | |
449 | | /// This function calculates the indices such that when partition by expressions reordered with the indices |
450 | | /// resulting expressions define a preset for existing ordering. |
451 | | /// For instance, if input is ordered by a, b, c and PARTITION BY b, a is used, |
452 | | /// this vector will be [1, 0]. It means that when we iterate b, a columns with the order [1, 0] |
453 | | /// resulting vector (a, b) is a preset of the existing ordering (a, b, c). |
454 | 52 | pub fn get_ordered_partition_by_indices( |
455 | 52 | partition_by_exprs: &[Arc<dyn PhysicalExpr>], |
456 | 52 | input: &Arc<dyn ExecutionPlan>, |
457 | 52 | ) -> Vec<usize> { |
458 | 52 | let (_, indices) = input |
459 | 52 | .equivalence_properties() |
460 | 52 | .find_longest_permutation(partition_by_exprs); |
461 | 52 | indices |
462 | 52 | } |
463 | | |
464 | 3 | pub(crate) fn get_partition_by_sort_exprs( |
465 | 3 | input: &Arc<dyn ExecutionPlan>, |
466 | 3 | partition_by_exprs: &[Arc<dyn PhysicalExpr>], |
467 | 3 | ordered_partition_by_indices: &[usize], |
468 | 3 | ) -> Result<LexOrdering> { |
469 | 3 | let ordered_partition_exprs = ordered_partition_by_indices |
470 | 3 | .iter() |
471 | 3 | .map(|idx| Arc::clone(&partition_by_exprs[*idx])0 ) |
472 | 3 | .collect::<Vec<_>>(); |
473 | 3 | // Make sure ordered section doesn't move over the partition by expression |
474 | 3 | assert!(ordered_partition_by_indices.len() <= partition_by_exprs.len()); |
475 | 3 | let (ordering, _) = input |
476 | 3 | .equivalence_properties() |
477 | 3 | .find_longest_permutation(&ordered_partition_exprs); |
478 | 3 | if ordering.len() == ordered_partition_exprs.len() { |
479 | 3 | Ok(ordering) |
480 | | } else { |
481 | 0 | exec_err!("Expects PARTITION BY expression to be ordered") |
482 | | } |
483 | 3 | } |
484 | | |
485 | 3 | pub(crate) fn window_equivalence_properties( |
486 | 3 | schema: &SchemaRef, |
487 | 3 | input: &Arc<dyn ExecutionPlan>, |
488 | 3 | window_expr: &[Arc<dyn WindowExpr>], |
489 | 3 | ) -> EquivalenceProperties { |
490 | 3 | // We need to update the schema, so we can not directly use |
491 | 3 | // `input.equivalence_properties()`. |
492 | 3 | let mut window_eq_properties = EquivalenceProperties::new(Arc::clone(schema)) |
493 | 3 | .extend(input.equivalence_properties().clone()); |
494 | | |
495 | 8 | for expr5 in window_expr { |
496 | 3 | if let Some(builtin_window_expr) = |
497 | 5 | expr.as_any().downcast_ref::<BuiltInWindowExpr>() |
498 | 3 | { |
499 | 3 | builtin_window_expr.add_equal_orderings(&mut window_eq_properties); |
500 | 3 | }2 |
501 | | } |
502 | 3 | window_eq_properties |
503 | 3 | } |
504 | | |
505 | | /// Constructs the best-fitting windowing operator (a `WindowAggExec` or a |
506 | | /// `BoundedWindowExec`) for the given `input` according to the specifications |
507 | | /// of `window_exprs` and `physical_partition_keys`. Here, best-fitting means |
508 | | /// not requiring additional sorting and/or partitioning for the given input. |
509 | | /// - A return value of `None` represents that there is no way to construct a |
510 | | /// windowing operator that doesn't need additional sorting/partitioning for |
511 | | /// the given input. Existing ordering should be changed to run the given |
512 | | /// windowing operation. |
513 | | /// - A `Some(window exec)` value contains the optimal windowing operator (a |
514 | | /// `WindowAggExec` or a `BoundedWindowExec`) for the given input. |
515 | 0 | pub fn get_best_fitting_window( |
516 | 0 | window_exprs: &[Arc<dyn WindowExpr>], |
517 | 0 | input: &Arc<dyn ExecutionPlan>, |
518 | 0 | // These are the partition keys used during repartitioning. |
519 | 0 | // They are either the same with `window_expr`'s PARTITION BY columns, |
520 | 0 | // or it is empty if partitioning is not desirable for this windowing operator. |
521 | 0 | physical_partition_keys: &[Arc<dyn PhysicalExpr>], |
522 | 0 | ) -> Result<Option<Arc<dyn ExecutionPlan>>> { |
523 | 0 | // Contains at least one window expr and all of the partition by and order by sections |
524 | 0 | // of the window_exprs are same. |
525 | 0 | let partitionby_exprs = window_exprs[0].partition_by(); |
526 | 0 | let orderby_keys = window_exprs[0].order_by(); |
527 | 0 | let (should_reverse, input_order_mode) = |
528 | 0 | if let Some((should_reverse, input_order_mode)) = |
529 | 0 | get_window_mode(partitionby_exprs, orderby_keys, input) |
530 | | { |
531 | 0 | (should_reverse, input_order_mode) |
532 | | } else { |
533 | 0 | return Ok(None); |
534 | | }; |
535 | 0 | let is_unbounded = input.execution_mode().is_unbounded(); |
536 | 0 | if !is_unbounded && input_order_mode != InputOrderMode::Sorted { |
537 | | // Executor has bounded input and `input_order_mode` is not `InputOrderMode::Sorted` |
538 | | // in this case removing the sort is not helpful, return: |
539 | 0 | return Ok(None); |
540 | 0 | }; |
541 | | |
542 | 0 | let window_expr = if should_reverse { |
543 | 0 | if let Some(reversed_window_expr) = window_exprs |
544 | 0 | .iter() |
545 | 0 | .map(|e| e.get_reverse_expr()) |
546 | 0 | .collect::<Option<Vec<_>>>() |
547 | | { |
548 | 0 | reversed_window_expr |
549 | | } else { |
550 | | // Cannot take reverse of any of the window expr |
551 | | // In this case, with existing ordering window cannot be run |
552 | 0 | return Ok(None); |
553 | | } |
554 | | } else { |
555 | 0 | window_exprs.to_vec() |
556 | | }; |
557 | | |
558 | | // If all window expressions can run with bounded memory, choose the |
559 | | // bounded window variant: |
560 | 0 | if window_expr.iter().all(|e| e.uses_bounded_memory()) { |
561 | 0 | Ok(Some(Arc::new(BoundedWindowAggExec::try_new( |
562 | 0 | window_expr, |
563 | 0 | Arc::clone(input), |
564 | 0 | physical_partition_keys.to_vec(), |
565 | 0 | input_order_mode, |
566 | 0 | )?) as _)) |
567 | 0 | } else if input_order_mode != InputOrderMode::Sorted { |
568 | | // For `WindowAggExec` to work correctly PARTITION BY columns should be sorted. |
569 | | // Hence, if `input_order_mode` is not `Sorted` we should convert |
570 | | // input ordering such that it can work with `Sorted` (add `SortExec`). |
571 | | // Effectively `WindowAggExec` works only in `Sorted` mode. |
572 | 0 | Ok(None) |
573 | | } else { |
574 | 0 | Ok(Some(Arc::new(WindowAggExec::try_new( |
575 | 0 | window_expr, |
576 | 0 | Arc::clone(input), |
577 | 0 | physical_partition_keys.to_vec(), |
578 | 0 | )?) as _)) |
579 | | } |
580 | 0 | } |
581 | | |
582 | | /// Compares physical ordering (output ordering of the `input` operator) with |
583 | | /// `partitionby_exprs` and `orderby_keys` to decide whether existing ordering |
584 | | /// is sufficient to run the current window operator. |
585 | | /// - A `None` return value indicates that we can not remove the sort in question |
586 | | /// (input ordering is not sufficient to run current window executor). |
587 | | /// - A `Some((bool, InputOrderMode))` value indicates that the window operator |
588 | | /// can run with existing input ordering, so we can remove `SortExec` before it. |
589 | | /// |
590 | | /// The `bool` field in the return value represents whether we should reverse window |
591 | | /// operator to remove `SortExec` before it. The `InputOrderMode` field represents |
592 | | /// the mode this window operator should work in to accommodate the existing ordering. |
593 | 75 | pub fn get_window_mode( |
594 | 75 | partitionby_exprs: &[Arc<dyn PhysicalExpr>], |
595 | 75 | orderby_keys: &[PhysicalSortExpr], |
596 | 75 | input: &Arc<dyn ExecutionPlan>, |
597 | 75 | ) -> Option<(bool, InputOrderMode)> { |
598 | 75 | let input_eqs = input.equivalence_properties().clone(); |
599 | 75 | let mut partition_by_reqs: LexRequirement = LexRequirement::new(vec![]); |
600 | 75 | let (_, indices) = input_eqs.find_longest_permutation(partitionby_exprs); |
601 | 76 | vec![].extend(indices.iter().map(|&idx| PhysicalSortRequirement { |
602 | 76 | expr: Arc::clone(&partitionby_exprs[idx]), |
603 | 76 | options: None, |
604 | 76 | })); |
605 | 75 | partition_by_reqs |
606 | 75 | .inner |
607 | 76 | .extend(indices.iter().map(|&idx| PhysicalSortRequirement { |
608 | 76 | expr: Arc::clone(&partitionby_exprs[idx]), |
609 | 76 | options: None, |
610 | 76 | })); |
611 | 75 | // Treat partition by exprs as constant. During analysis of requirements are satisfied. |
612 | 75 | let const_exprs = partitionby_exprs.iter().map(ConstExpr::from); |
613 | 75 | let partition_by_eqs = input_eqs.with_constants(const_exprs); |
614 | 75 | let order_by_reqs = PhysicalSortRequirement::from_sort_exprs(orderby_keys); |
615 | 75 | let reverse_order_by_reqs = |
616 | 75 | PhysicalSortRequirement::from_sort_exprs(&reverse_order_bys(orderby_keys)); |
617 | 89 | for (should_swap, order_by_reqs) in |
618 | 75 | [(false, order_by_reqs), (true, reverse_order_by_reqs)] |
619 | | { |
620 | 89 | let req = LexRequirement::new( |
621 | 89 | [partition_by_reqs.inner.clone(), order_by_reqs.inner].concat(), |
622 | 89 | ); |
623 | 89 | let req = collapse_lex_req(req); |
624 | 89 | if partition_by_eqs.ordering_satisfy_requirement(&req) { |
625 | | // Window can be run with existing ordering |
626 | 63 | let mode = if indices.len() == partitionby_exprs.len() { |
627 | 30 | InputOrderMode::Sorted |
628 | 33 | } else if indices.is_empty() { |
629 | 20 | InputOrderMode::Linear |
630 | | } else { |
631 | 13 | InputOrderMode::PartiallySorted(indices) |
632 | | }; |
633 | 63 | return Some((should_swap, mode)); |
634 | 26 | } |
635 | | } |
636 | 12 | None |
637 | 75 | } |
638 | | |
639 | | #[cfg(test)] |
640 | | mod tests { |
641 | | use super::*; |
642 | | use crate::collect; |
643 | | use crate::expressions::col; |
644 | | use crate::streaming::StreamingTableExec; |
645 | | use crate::test::assert_is_pending; |
646 | | use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; |
647 | | |
648 | | use arrow::compute::SortOptions; |
649 | | use datafusion_execution::TaskContext; |
650 | | |
651 | | use datafusion_functions_aggregate::count::count_udaf; |
652 | | use futures::FutureExt; |
653 | | use InputOrderMode::{Linear, PartiallySorted, Sorted}; |
654 | | |
655 | 2 | fn create_test_schema() -> Result<SchemaRef> { |
656 | 2 | let nullable_column = Field::new("nullable_col", DataType::Int32, true); |
657 | 2 | let non_nullable_column = Field::new("non_nullable_col", DataType::Int32, false); |
658 | 2 | let schema = Arc::new(Schema::new(vec![nullable_column, non_nullable_column])); |
659 | 2 | |
660 | 2 | Ok(schema) |
661 | 2 | } |
662 | | |
663 | 1 | fn create_test_schema2() -> Result<SchemaRef> { |
664 | 1 | let a = Field::new("a", DataType::Int32, true); |
665 | 1 | let b = Field::new("b", DataType::Int32, true); |
666 | 1 | let c = Field::new("c", DataType::Int32, true); |
667 | 1 | let d = Field::new("d", DataType::Int32, true); |
668 | 1 | let e = Field::new("e", DataType::Int32, true); |
669 | 1 | let schema = Arc::new(Schema::new(vec![a, b, c, d, e])); |
670 | 1 | Ok(schema) |
671 | 1 | } |
672 | | |
673 | | // Generate a schema which consists of 5 columns (a, b, c, d, e) |
674 | 2 | fn create_test_schema3() -> Result<SchemaRef> { |
675 | 2 | let a = Field::new("a", DataType::Int32, true); |
676 | 2 | let b = Field::new("b", DataType::Int32, false); |
677 | 2 | let c = Field::new("c", DataType::Int32, true); |
678 | 2 | let d = Field::new("d", DataType::Int32, false); |
679 | 2 | let e = Field::new("e", DataType::Int32, false); |
680 | 2 | let schema = Arc::new(Schema::new(vec![a, b, c, d, e])); |
681 | 2 | Ok(schema) |
682 | 2 | } |
683 | | |
684 | | /// make PhysicalSortExpr with default options |
685 | 8 | pub fn sort_expr(name: &str, schema: &Schema) -> PhysicalSortExpr { |
686 | 8 | sort_expr_options(name, schema, SortOptions::default()) |
687 | 8 | } |
688 | | |
689 | | /// PhysicalSortExpr with specified options |
690 | 8 | pub fn sort_expr_options( |
691 | 8 | name: &str, |
692 | 8 | schema: &Schema, |
693 | 8 | options: SortOptions, |
694 | 8 | ) -> PhysicalSortExpr { |
695 | 8 | PhysicalSortExpr { |
696 | 8 | expr: col(name, schema).unwrap(), |
697 | 8 | options, |
698 | 8 | } |
699 | 8 | } |
700 | | |
701 | | /// Created a sorted Streaming Table exec |
702 | 2 | pub fn streaming_table_exec( |
703 | 2 | schema: &SchemaRef, |
704 | 2 | sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>, |
705 | 2 | infinite_source: bool, |
706 | 2 | ) -> Result<Arc<dyn ExecutionPlan>> { |
707 | 2 | let sort_exprs = sort_exprs.into_iter().collect(); |
708 | 2 | |
709 | 2 | Ok(Arc::new(StreamingTableExec::try_new( |
710 | 2 | Arc::clone(schema), |
711 | 2 | vec![], |
712 | 2 | None, |
713 | 2 | Some(sort_exprs), |
714 | 2 | infinite_source, |
715 | 2 | None, |
716 | 2 | )?0 )) |
717 | 2 | } |
718 | | |
719 | | #[tokio::test] |
720 | 1 | async fn test_calc_requirements() -> Result<()> { |
721 | 1 | let schema = create_test_schema2()?0 ; |
722 | 1 | let test_data = vec![ |
723 | 1 | // PARTITION BY a, ORDER BY b ASC NULLS FIRST |
724 | 1 | ( |
725 | 1 | vec!["a"], |
726 | 1 | vec![("b", true, true)], |
727 | 1 | vec![("a", None), ("b", Some((true, true)))], |
728 | 1 | ), |
729 | 1 | // PARTITION BY a, ORDER BY a ASC NULLS FIRST |
730 | 1 | (vec!["a"], vec![("a", true, true)], vec![("a", None)]), |
731 | 1 | // PARTITION BY a, ORDER BY b ASC NULLS FIRST, c DESC NULLS LAST |
732 | 1 | ( |
733 | 1 | vec!["a"], |
734 | 1 | vec![("b", true, true), ("c", false, false)], |
735 | 1 | vec![ |
736 | 1 | ("a", None), |
737 | 1 | ("b", Some((true, true))), |
738 | 1 | ("c", Some((false, false))), |
739 | 1 | ], |
740 | 1 | ), |
741 | 1 | // PARTITION BY a, c, ORDER BY b ASC NULLS FIRST, c DESC NULLS LAST |
742 | 1 | ( |
743 | 1 | vec!["a", "c"], |
744 | 1 | vec![("b", true, true), ("c", false, false)], |
745 | 1 | vec![("a", None), ("c", None), ("b", Some((true, true)))], |
746 | 1 | ), |
747 | 1 | ]; |
748 | 5 | for (pb_params, ob_params, expected_params4 ) in test_data { |
749 | 4 | let mut partitionbys = vec![]; |
750 | 9 | for col_name5 in pb_params { |
751 | 5 | partitionbys.push(col(col_name, &schema)?0 ); |
752 | 1 | } |
753 | 1 | |
754 | 4 | let mut orderbys = vec![]; |
755 | 10 | for (col_name, descending, nulls_first6 ) in ob_params { |
756 | 6 | let expr = col(col_name, &schema)?0 ; |
757 | 6 | let options = SortOptions { |
758 | 6 | descending, |
759 | 6 | nulls_first, |
760 | 6 | }; |
761 | 6 | orderbys.push(PhysicalSortExpr { expr, options }); |
762 | 1 | } |
763 | 1 | |
764 | 4 | let mut expected: Option<LexRequirement> = None; |
765 | 13 | for (col_name, reqs9 ) in expected_params { |
766 | 9 | let options = reqs.map(|(descending, nulls_first)| SortOptions { |
767 | 4 | descending, |
768 | 4 | nulls_first, |
769 | 9 | }); |
770 | 9 | let expr = col(col_name, &schema)?0 ; |
771 | 9 | let res = PhysicalSortRequirement::new(expr, options); |
772 | 9 | if let Some(expected5 ) = &mut expected { |
773 | 5 | expected.push(res); |
774 | 5 | } else { |
775 | 4 | expected = Some(LexRequirement::new(vec![res])); |
776 | 4 | } |
777 | 1 | } |
778 | 4 | assert_eq!(calc_requirements(partitionbys, orderbys), expected); |
779 | 1 | } |
780 | 1 | Ok(()) |
781 | 1 | } |
782 | | |
783 | | #[tokio::test] |
784 | 1 | async fn test_drop_cancel() -> Result<()> { |
785 | 1 | let task_ctx = Arc::new(TaskContext::default()); |
786 | 1 | let schema = |
787 | 1 | Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, true)])); |
788 | 1 | |
789 | 1 | let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1)); |
790 | 1 | let refs = blocking_exec.refs(); |
791 | 1 | let window_agg_exec = Arc::new(WindowAggExec::try_new( |
792 | 1 | vec![create_window_expr( |
793 | 1 | &WindowFunctionDefinition::AggregateUDF(count_udaf()), |
794 | 1 | "count".to_owned(), |
795 | 1 | &[col("a", &schema)?0 ], |
796 | 1 | &[], |
797 | 1 | &[], |
798 | 1 | Arc::new(WindowFrame::new(None)), |
799 | 1 | schema.as_ref(), |
800 | 1 | false, |
801 | 1 | )?0 ], |
802 | 1 | blocking_exec, |
803 | 1 | vec![], |
804 | 1 | )?0 ); |
805 | 1 | |
806 | 1 | let fut = collect(window_agg_exec, task_ctx); |
807 | 1 | let mut fut = fut.boxed(); |
808 | 1 | |
809 | 1 | assert_is_pending(&mut fut); |
810 | 1 | drop(fut); |
811 | 1 | assert_strong_count_converges_to_zero(refs).await0 ; |
812 | 1 | |
813 | 1 | Ok(()) |
814 | 1 | } |
815 | | |
816 | | #[tokio::test] |
817 | 1 | async fn test_satisfy_nullable() -> Result<()> { |
818 | 1 | let schema = create_test_schema()?0 ; |
819 | 1 | let params = vec![ |
820 | 1 | ((true, true), (false, false), false), |
821 | 1 | ((true, true), (false, true), false), |
822 | 1 | ((true, true), (true, false), false), |
823 | 1 | ((true, false), (false, true), false), |
824 | 1 | ((true, false), (false, false), false), |
825 | 1 | ((true, false), (true, true), false), |
826 | 1 | ((true, false), (true, false), true), |
827 | 1 | ]; |
828 | 1 | for ( |
829 | 7 | (physical_desc, physical_nulls_first), |
830 | 7 | (req_desc, req_nulls_first), |
831 | 7 | expected, |
832 | 8 | ) in params |
833 | 1 | { |
834 | 7 | let physical_ordering = PhysicalSortExpr { |
835 | 7 | expr: col("nullable_col", &schema)?0 , |
836 | 7 | options: SortOptions { |
837 | 7 | descending: physical_desc, |
838 | 7 | nulls_first: physical_nulls_first, |
839 | 7 | }, |
840 | 1 | }; |
841 | 7 | let required_ordering = PhysicalSortExpr { |
842 | 7 | expr: col("nullable_col", &schema)?0 , |
843 | 7 | options: SortOptions { |
844 | 7 | descending: req_desc, |
845 | 7 | nulls_first: req_nulls_first, |
846 | 7 | }, |
847 | 7 | }; |
848 | 7 | let res = physical_ordering.satisfy(&required_ordering.into(), &schema); |
849 | 7 | assert_eq!(res, expected); |
850 | 1 | } |
851 | 1 | |
852 | 1 | Ok(()) |
853 | 1 | } |
854 | | |
855 | | #[tokio::test] |
856 | 1 | async fn test_satisfy_non_nullable() -> Result<()> { |
857 | 1 | let schema = create_test_schema()?0 ; |
858 | 1 | |
859 | 1 | let params = vec![ |
860 | 1 | ((true, true), (false, false), false), |
861 | 1 | ((true, true), (false, true), false), |
862 | 1 | ((true, true), (true, false), true), |
863 | 1 | ((true, false), (false, true), false), |
864 | 1 | ((true, false), (false, false), false), |
865 | 1 | ((true, false), (true, true), true), |
866 | 1 | ((true, false), (true, false), true), |
867 | 1 | ]; |
868 | 1 | for ( |
869 | 7 | (physical_desc, physical_nulls_first), |
870 | 7 | (req_desc, req_nulls_first), |
871 | 7 | expected, |
872 | 8 | ) in params |
873 | 1 | { |
874 | 7 | let physical_ordering = PhysicalSortExpr { |
875 | 7 | expr: col("non_nullable_col", &schema)?0 , |
876 | 7 | options: SortOptions { |
877 | 7 | descending: physical_desc, |
878 | 7 | nulls_first: physical_nulls_first, |
879 | 7 | }, |
880 | 1 | }; |
881 | 7 | let required_ordering = PhysicalSortExpr { |
882 | 7 | expr: col("non_nullable_col", &schema)?0 , |
883 | 7 | options: SortOptions { |
884 | 7 | descending: req_desc, |
885 | 7 | nulls_first: req_nulls_first, |
886 | 7 | }, |
887 | 7 | }; |
888 | 7 | let res = physical_ordering.satisfy(&required_ordering.into(), &schema); |
889 | 7 | assert_eq!(res, expected); |
890 | 1 | } |
891 | 1 | |
892 | 1 | Ok(()) |
893 | 1 | } |
894 | | |
895 | | #[tokio::test] |
896 | 1 | async fn test_get_window_mode_exhaustive() -> Result<()> { |
897 | 1 | let test_schema = create_test_schema3()?0 ; |
898 | 1 | // Columns a,c are nullable whereas b,d are not nullable. |
899 | 1 | // Source is sorted by a ASC NULLS FIRST, b ASC NULLS FIRST, c ASC NULLS FIRST, d ASC NULLS FIRST |
900 | 1 | // Column e is not ordered. |
901 | 1 | let sort_exprs = vec![ |
902 | 1 | sort_expr("a", &test_schema), |
903 | 1 | sort_expr("b", &test_schema), |
904 | 1 | sort_expr("c", &test_schema), |
905 | 1 | sort_expr("d", &test_schema), |
906 | 1 | ]; |
907 | 1 | let exec_unbounded = streaming_table_exec(&test_schema, sort_exprs, true)?0 ; |
908 | 1 | |
909 | 1 | // test cases consists of vector of tuples. Where each tuple represents a single test case. |
910 | 1 | // First field in the tuple is Vec<str> where each element in the vector represents PARTITION BY columns |
911 | 1 | // For instance `vec!["a", "b"]` corresponds to PARTITION BY a, b |
912 | 1 | // Second field in the tuple is Vec<str> where each element in the vector represents ORDER BY columns |
913 | 1 | // For instance, vec!["c"], corresponds to ORDER BY c ASC NULLS FIRST, (ordering is default ordering. We do not check |
914 | 1 | // for reversibility in this test). |
915 | 1 | // Third field in the tuple is Option<InputOrderMode>, which corresponds to expected algorithm mode. |
916 | 1 | // None represents that existing ordering is not sufficient to run executor with any one of the algorithms |
917 | 1 | // (We need to add SortExec to be able to run it). |
918 | 1 | // Some(InputOrderMode) represents, we can run algorithm with existing ordering; and algorithm should work in |
919 | 1 | // InputOrderMode. |
920 | 1 | let test_cases = vec![ |
921 | 1 | (vec!["a"], vec!["a"], Some(Sorted)), |
922 | 1 | (vec!["a"], vec!["b"], Some(Sorted)), |
923 | 1 | (vec!["a"], vec!["c"], None), |
924 | 1 | (vec!["a"], vec!["a", "b"], Some(Sorted)), |
925 | 1 | (vec!["a"], vec!["b", "c"], Some(Sorted)), |
926 | 1 | (vec!["a"], vec!["a", "c"], None), |
927 | 1 | (vec!["a"], vec!["a", "b", "c"], Some(Sorted)), |
928 | 1 | (vec!["b"], vec!["a"], Some(Linear)), |
929 | 1 | (vec!["b"], vec!["b"], Some(Linear)), |
930 | 1 | (vec!["b"], vec!["c"], None), |
931 | 1 | (vec!["b"], vec!["a", "b"], Some(Linear)), |
932 | 1 | (vec!["b"], vec!["b", "c"], None), |
933 | 1 | (vec!["b"], vec!["a", "c"], Some(Linear)), |
934 | 1 | (vec!["b"], vec!["a", "b", "c"], Some(Linear)), |
935 | 1 | (vec!["c"], vec!["a"], Some(Linear)), |
936 | 1 | (vec!["c"], vec!["b"], None), |
937 | 1 | (vec!["c"], vec!["c"], Some(Linear)), |
938 | 1 | (vec!["c"], vec!["a", "b"], Some(Linear)), |
939 | 1 | (vec!["c"], vec!["b", "c"], None), |
940 | 1 | (vec!["c"], vec!["a", "c"], Some(Linear)), |
941 | 1 | (vec!["c"], vec!["a", "b", "c"], Some(Linear)), |
942 | 1 | (vec!["b", "a"], vec!["a"], Some(Sorted)), |
943 | 1 | (vec!["b", "a"], vec!["b"], Some(Sorted)), |
944 | 1 | (vec!["b", "a"], vec!["c"], Some(Sorted)), |
945 | 1 | (vec!["b", "a"], vec!["a", "b"], Some(Sorted)), |
946 | 1 | (vec!["b", "a"], vec!["b", "c"], Some(Sorted)), |
947 | 1 | (vec!["b", "a"], vec!["a", "c"], Some(Sorted)), |
948 | 1 | (vec!["b", "a"], vec!["a", "b", "c"], Some(Sorted)), |
949 | 1 | (vec!["c", "b"], vec!["a"], Some(Linear)), |
950 | 1 | (vec!["c", "b"], vec!["b"], Some(Linear)), |
951 | 1 | (vec!["c", "b"], vec!["c"], Some(Linear)), |
952 | 1 | (vec!["c", "b"], vec!["a", "b"], Some(Linear)), |
953 | 1 | (vec!["c", "b"], vec!["b", "c"], Some(Linear)), |
954 | 1 | (vec!["c", "b"], vec!["a", "c"], Some(Linear)), |
955 | 1 | (vec!["c", "b"], vec!["a", "b", "c"], Some(Linear)), |
956 | 1 | (vec!["c", "a"], vec!["a"], Some(PartiallySorted(vec![1]))), |
957 | 1 | (vec!["c", "a"], vec!["b"], Some(PartiallySorted(vec![1]))), |
958 | 1 | (vec!["c", "a"], vec!["c"], Some(PartiallySorted(vec![1]))), |
959 | 1 | ( |
960 | 1 | vec!["c", "a"], |
961 | 1 | vec!["a", "b"], |
962 | 1 | Some(PartiallySorted(vec![1])), |
963 | 1 | ), |
964 | 1 | ( |
965 | 1 | vec!["c", "a"], |
966 | 1 | vec!["b", "c"], |
967 | 1 | Some(PartiallySorted(vec![1])), |
968 | 1 | ), |
969 | 1 | ( |
970 | 1 | vec!["c", "a"], |
971 | 1 | vec!["a", "c"], |
972 | 1 | Some(PartiallySorted(vec![1])), |
973 | 1 | ), |
974 | 1 | ( |
975 | 1 | vec!["c", "a"], |
976 | 1 | vec!["a", "b", "c"], |
977 | 1 | Some(PartiallySorted(vec![1])), |
978 | 1 | ), |
979 | 1 | (vec!["c", "b", "a"], vec!["a"], Some(Sorted)), |
980 | 1 | (vec!["c", "b", "a"], vec!["b"], Some(Sorted)), |
981 | 1 | (vec!["c", "b", "a"], vec!["c"], Some(Sorted)), |
982 | 1 | (vec!["c", "b", "a"], vec!["a", "b"], Some(Sorted)), |
983 | 1 | (vec!["c", "b", "a"], vec!["b", "c"], Some(Sorted)), |
984 | 1 | (vec!["c", "b", "a"], vec!["a", "c"], Some(Sorted)), |
985 | 1 | (vec!["c", "b", "a"], vec!["a", "b", "c"], Some(Sorted)), |
986 | 1 | ]; |
987 | 49 | for (case_idx, test_case) in test_cases.iter().enumerate()1 { |
988 | 49 | let (partition_by_columns, order_by_params, expected) = &test_case; |
989 | 49 | let mut partition_by_exprs = vec![]; |
990 | 133 | for col_name84 in partition_by_columns { |
991 | 84 | partition_by_exprs.push(col(col_name, &test_schema)?0 ); |
992 | 1 | } |
993 | 1 | |
994 | 49 | let mut order_by_exprs = vec![]; |
995 | 133 | for col_name84 in order_by_params { |
996 | 84 | let expr = col(col_name, &test_schema)?0 ; |
997 | 1 | // Give default ordering, this is same with input ordering direction |
998 | 1 | // In this test we do check for reversibility. |
999 | 84 | let options = SortOptions::default(); |
1000 | 84 | order_by_exprs.push(PhysicalSortExpr { expr, options }); |
1001 | 1 | } |
1002 | 49 | let res = |
1003 | 49 | get_window_mode(&partition_by_exprs, &order_by_exprs, &exec_unbounded); |
1004 | 49 | // Since reversibility is not important in this test. Convert Option<(bool, InputOrderMode)> to Option<InputOrderMode> |
1005 | 49 | let res = res.map(|(_, mode)| mode43 ); |
1006 | 49 | assert_eq!( |
1007 | 1 | res, *expected, |
1008 | 1 | "Unexpected result for in unbounded test case#: {case_idx:?}, case: {test_case:?}"0 |
1009 | 1 | ); |
1010 | 1 | } |
1011 | 1 | |
1012 | 1 | Ok(()) |
1013 | 1 | } |
1014 | | |
1015 | | #[tokio::test] |
1016 | 1 | async fn test_get_window_mode() -> Result<()> { |
1017 | 1 | let test_schema = create_test_schema3()?0 ; |
1018 | 1 | // Columns a,c are nullable whereas b,d are not nullable. |
1019 | 1 | // Source is sorted by a ASC NULLS FIRST, b ASC NULLS FIRST, c ASC NULLS FIRST, d ASC NULLS FIRST |
1020 | 1 | // Column e is not ordered. |
1021 | 1 | let sort_exprs = vec![ |
1022 | 1 | sort_expr("a", &test_schema), |
1023 | 1 | sort_expr("b", &test_schema), |
1024 | 1 | sort_expr("c", &test_schema), |
1025 | 1 | sort_expr("d", &test_schema), |
1026 | 1 | ]; |
1027 | 1 | let exec_unbounded = streaming_table_exec(&test_schema, sort_exprs, true)?0 ; |
1028 | 1 | |
1029 | 1 | // test cases consists of vector of tuples. Where each tuple represents a single test case. |
1030 | 1 | // First field in the tuple is Vec<str> where each element in the vector represents PARTITION BY columns |
1031 | 1 | // For instance `vec!["a", "b"]` corresponds to PARTITION BY a, b |
1032 | 1 | // Second field in the tuple is Vec<(str, bool, bool)> where each element in the vector represents ORDER BY columns |
1033 | 1 | // For instance, vec![("c", false, false)], corresponds to ORDER BY c ASC NULLS LAST, |
1034 | 1 | // similarly, vec![("c", true, true)], corresponds to ORDER BY c DESC NULLS FIRST, |
1035 | 1 | // Third field in the tuple is Option<(bool, InputOrderMode)>, which corresponds to expected result. |
1036 | 1 | // None represents that existing ordering is not sufficient to run executor with any one of the algorithms |
1037 | 1 | // (We need to add SortExec to be able to run it). |
1038 | 1 | // Some((bool, InputOrderMode)) represents, we can run algorithm with existing ordering. Algorithm should work in |
1039 | 1 | // InputOrderMode, bool field represents whether we should reverse window expressions to run executor with existing ordering. |
1040 | 1 | // For instance, `Some((false, InputOrderMode::Sorted))`, represents that we shouldn't reverse window expressions. And algorithm |
1041 | 1 | // should work in Sorted mode to work with existing ordering. |
1042 | 1 | let test_cases = vec![ |
1043 | 1 | // PARTITION BY a, b ORDER BY c ASC NULLS LAST |
1044 | 1 | (vec!["a", "b"], vec![("c", false, false)], None), |
1045 | 1 | // ORDER BY c ASC NULLS FIRST |
1046 | 1 | (vec![], vec![("c", false, true)], None), |
1047 | 1 | // PARTITION BY b, ORDER BY c ASC NULLS FIRST |
1048 | 1 | (vec!["b"], vec![("c", false, true)], None), |
1049 | 1 | // PARTITION BY a, ORDER BY c ASC NULLS FIRST |
1050 | 1 | (vec!["a"], vec![("c", false, true)], None), |
1051 | 1 | // PARTITION BY b, ORDER BY c ASC NULLS FIRST |
1052 | 1 | ( |
1053 | 1 | vec!["a", "b"], |
1054 | 1 | vec![("c", false, true), ("e", false, true)], |
1055 | 1 | None, |
1056 | 1 | ), |
1057 | 1 | // PARTITION BY a, ORDER BY b ASC NULLS FIRST |
1058 | 1 | (vec!["a"], vec![("b", false, true)], Some((false, Sorted))), |
1059 | 1 | // PARTITION BY a, ORDER BY a ASC NULLS FIRST |
1060 | 1 | (vec!["a"], vec![("a", false, true)], Some((false, Sorted))), |
1061 | 1 | // PARTITION BY a, ORDER BY a ASC NULLS LAST |
1062 | 1 | (vec!["a"], vec![("a", false, false)], Some((false, Sorted))), |
1063 | 1 | // PARTITION BY a, ORDER BY a DESC NULLS FIRST |
1064 | 1 | (vec!["a"], vec![("a", true, true)], Some((false, Sorted))), |
1065 | 1 | // PARTITION BY a, ORDER BY a DESC NULLS LAST |
1066 | 1 | (vec!["a"], vec![("a", true, false)], Some((false, Sorted))), |
1067 | 1 | // PARTITION BY a, ORDER BY b ASC NULLS LAST |
1068 | 1 | (vec!["a"], vec![("b", false, false)], Some((false, Sorted))), |
1069 | 1 | // PARTITION BY a, ORDER BY b DESC NULLS LAST |
1070 | 1 | (vec!["a"], vec![("b", true, false)], Some((true, Sorted))), |
1071 | 1 | // PARTITION BY a, b ORDER BY c ASC NULLS FIRST |
1072 | 1 | ( |
1073 | 1 | vec!["a", "b"], |
1074 | 1 | vec![("c", false, true)], |
1075 | 1 | Some((false, Sorted)), |
1076 | 1 | ), |
1077 | 1 | // PARTITION BY b, a ORDER BY c ASC NULLS FIRST |
1078 | 1 | ( |
1079 | 1 | vec!["b", "a"], |
1080 | 1 | vec![("c", false, true)], |
1081 | 1 | Some((false, Sorted)), |
1082 | 1 | ), |
1083 | 1 | // PARTITION BY a, b ORDER BY c DESC NULLS LAST |
1084 | 1 | ( |
1085 | 1 | vec!["a", "b"], |
1086 | 1 | vec![("c", true, false)], |
1087 | 1 | Some((true, Sorted)), |
1088 | 1 | ), |
1089 | 1 | // PARTITION BY e ORDER BY a ASC NULLS FIRST |
1090 | 1 | ( |
1091 | 1 | vec!["e"], |
1092 | 1 | vec![("a", false, true)], |
1093 | 1 | // For unbounded, expects to work in Linear mode. Shouldn't reverse window function. |
1094 | 1 | Some((false, Linear)), |
1095 | 1 | ), |
1096 | 1 | // PARTITION BY b, c ORDER BY a ASC NULLS FIRST, c ASC NULLS FIRST |
1097 | 1 | ( |
1098 | 1 | vec!["b", "c"], |
1099 | 1 | vec![("a", false, true), ("c", false, true)], |
1100 | 1 | Some((false, Linear)), |
1101 | 1 | ), |
1102 | 1 | // PARTITION BY b ORDER BY a ASC NULLS FIRST |
1103 | 1 | (vec!["b"], vec![("a", false, true)], Some((false, Linear))), |
1104 | 1 | // PARTITION BY a, e ORDER BY b ASC NULLS FIRST |
1105 | 1 | ( |
1106 | 1 | vec!["a", "e"], |
1107 | 1 | vec![("b", false, true)], |
1108 | 1 | Some((false, PartiallySorted(vec![0]))), |
1109 | 1 | ), |
1110 | 1 | // PARTITION BY a, c ORDER BY b ASC NULLS FIRST |
1111 | 1 | ( |
1112 | 1 | vec!["a", "c"], |
1113 | 1 | vec![("b", false, true)], |
1114 | 1 | Some((false, PartiallySorted(vec![0]))), |
1115 | 1 | ), |
1116 | 1 | // PARTITION BY c, a ORDER BY b ASC NULLS FIRST |
1117 | 1 | ( |
1118 | 1 | vec!["c", "a"], |
1119 | 1 | vec![("b", false, true)], |
1120 | 1 | Some((false, PartiallySorted(vec![1]))), |
1121 | 1 | ), |
1122 | 1 | // PARTITION BY d, b, a ORDER BY c ASC NULLS FIRST |
1123 | 1 | ( |
1124 | 1 | vec!["d", "b", "a"], |
1125 | 1 | vec![("c", false, true)], |
1126 | 1 | Some((false, PartiallySorted(vec![2, 1]))), |
1127 | 1 | ), |
1128 | 1 | // PARTITION BY e, b, a ORDER BY c ASC NULLS FIRST |
1129 | 1 | ( |
1130 | 1 | vec!["e", "b", "a"], |
1131 | 1 | vec![("c", false, true)], |
1132 | 1 | Some((false, PartiallySorted(vec![2, 1]))), |
1133 | 1 | ), |
1134 | 1 | // PARTITION BY d, a ORDER BY b ASC NULLS FIRST |
1135 | 1 | ( |
1136 | 1 | vec!["d", "a"], |
1137 | 1 | vec![("b", false, true)], |
1138 | 1 | Some((false, PartiallySorted(vec![1]))), |
1139 | 1 | ), |
1140 | 1 | // PARTITION BY b, ORDER BY b, a ASC NULLS FIRST |
1141 | 1 | ( |
1142 | 1 | vec!["a"], |
1143 | 1 | vec![("b", false, true), ("a", false, true)], |
1144 | 1 | Some((false, Sorted)), |
1145 | 1 | ), |
1146 | 1 | // ORDER BY b, a ASC NULLS FIRST |
1147 | 1 | (vec![], vec![("b", false, true), ("a", false, true)], None), |
1148 | 1 | ]; |
1149 | 26 | for (case_idx, test_case) in test_cases.iter().enumerate()1 { |
1150 | 26 | let (partition_by_columns, order_by_params, expected) = &test_case; |
1151 | 26 | let mut partition_by_exprs = vec![]; |
1152 | 64 | for col_name38 in partition_by_columns { |
1153 | 38 | partition_by_exprs.push(col(col_name, &test_schema)?0 ); |
1154 | 1 | } |
1155 | 1 | |
1156 | 26 | let mut order_by_exprs = vec![]; |
1157 | 56 | for (col_name, descending, nulls_first30 ) in order_by_params { |
1158 | 30 | let expr = col(col_name, &test_schema)?0 ; |
1159 | 30 | let options = SortOptions { |
1160 | 30 | descending: *descending, |
1161 | 30 | nulls_first: *nulls_first, |
1162 | 30 | }; |
1163 | 30 | order_by_exprs.push(PhysicalSortExpr { expr, options }); |
1164 | 1 | } |
1165 | 1 | |
1166 | 26 | assert_eq!( |
1167 | 26 | get_window_mode(&partition_by_exprs, &order_by_exprs, &exec_unbounded), |
1168 | 1 | *expected, |
1169 | 1 | "Unexpected result for in unbounded test case#: {case_idx:?}, case: {test_case:?}"0 |
1170 | 1 | ); |
1171 | 1 | } |
1172 | 1 | |
1173 | 1 | Ok(()) |
1174 | 1 | } |
1175 | | } |