/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/equivalence/properties.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use std::fmt::Display; |
19 | | use std::hash::{Hash, Hasher}; |
20 | | use std::sync::Arc; |
21 | | |
22 | | use super::ordering::collapse_lex_ordering; |
23 | | use crate::equivalence::class::const_exprs_contains; |
24 | | use crate::equivalence::{ |
25 | | collapse_lex_req, EquivalenceClass, EquivalenceGroup, OrderingEquivalenceClass, |
26 | | ProjectionMapping, |
27 | | }; |
28 | | use crate::expressions::{with_new_schema, CastExpr, Column, Literal}; |
29 | | use crate::{ |
30 | | physical_exprs_contains, ConstExpr, LexOrdering, LexOrderingRef, LexRequirement, |
31 | | LexRequirementRef, PhysicalExpr, PhysicalExprRef, PhysicalSortExpr, |
32 | | PhysicalSortRequirement, |
33 | | }; |
34 | | |
35 | | use arrow_schema::{SchemaRef, SortOptions}; |
36 | | use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; |
37 | | use datafusion_common::{internal_err, plan_err, JoinSide, JoinType, Result}; |
38 | | use datafusion_expr::interval_arithmetic::Interval; |
39 | | use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; |
40 | | use datafusion_physical_expr_common::utils::ExprPropertiesNode; |
41 | | |
42 | | use indexmap::{IndexMap, IndexSet}; |
43 | | use itertools::Itertools; |
44 | | |
45 | | /// A `EquivalenceProperties` object stores information known about the output |
46 | | /// of a plan node, that can be used to optimize the plan. |
47 | | /// |
48 | | /// Currently, it keeps track of: |
49 | | /// - Sort expressions (orderings) |
50 | | /// - Equivalent expressions: expressions that are known to have same value. |
51 | | /// - Constants expressions: expressions that are known to contain a single |
52 | | /// constant value. |
53 | | /// |
54 | | /// # Example equivalent sort expressions |
55 | | /// |
56 | | /// Consider table below: |
57 | | /// |
58 | | /// ```text |
59 | | /// ┌-------┐ |
60 | | /// | a | b | |
61 | | /// |---|---| |
62 | | /// | 1 | 9 | |
63 | | /// | 2 | 8 | |
64 | | /// | 3 | 7 | |
65 | | /// | 5 | 5 | |
66 | | /// └---┴---┘ |
67 | | /// ``` |
68 | | /// |
69 | | /// In this case, both `a ASC` and `b DESC` can describe the table ordering. |
70 | | /// `EquivalenceProperties`, tracks these different valid sort expressions and |
71 | | /// treat `a ASC` and `b DESC` on an equal footing. For example if the query |
72 | | /// specifies the output sorted by EITHER `a ASC` or `b DESC`, the sort can be |
73 | | /// avoided. |
74 | | /// |
75 | | /// # Example equivalent expressions |
76 | | /// |
77 | | /// Similarly, consider the table below: |
78 | | /// |
79 | | /// ```text |
80 | | /// ┌-------┐ |
81 | | /// | a | b | |
82 | | /// |---|---| |
83 | | /// | 1 | 1 | |
84 | | /// | 2 | 2 | |
85 | | /// | 3 | 3 | |
86 | | /// | 5 | 5 | |
87 | | /// └---┴---┘ |
88 | | /// ``` |
89 | | /// |
90 | | /// In this case, columns `a` and `b` always have the same value, which can of |
91 | | /// such equivalences inside this object. With this information, Datafusion can |
92 | | /// optimize operations such as. For example, if the partition requirement is |
93 | | /// `Hash(a)` and output partitioning is `Hash(b)`, then DataFusion avoids |
94 | | /// repartitioning the data as the existing partitioning satisfies the |
95 | | /// requirement. |
96 | | /// |
97 | | /// # Code Example |
98 | | /// ``` |
99 | | /// # use std::sync::Arc; |
100 | | /// # use arrow_schema::{Schema, Field, DataType, SchemaRef}; |
101 | | /// # use datafusion_physical_expr::{ConstExpr, EquivalenceProperties}; |
102 | | /// # use datafusion_physical_expr::expressions::col; |
103 | | /// use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; |
104 | | /// # let schema: SchemaRef = Arc::new(Schema::new(vec![ |
105 | | /// # Field::new("a", DataType::Int32, false), |
106 | | /// # Field::new("b", DataType::Int32, false), |
107 | | /// # Field::new("c", DataType::Int32, false), |
108 | | /// # ])); |
109 | | /// # let col_a = col("a", &schema).unwrap(); |
110 | | /// # let col_b = col("b", &schema).unwrap(); |
111 | | /// # let col_c = col("c", &schema).unwrap(); |
112 | | /// // This object represents data that is sorted by a ASC, c DESC |
113 | | /// // with a single constant value of b |
114 | | /// let mut eq_properties = EquivalenceProperties::new(schema) |
115 | | /// .with_constants(vec![ConstExpr::from(col_b)]); |
116 | | /// eq_properties.add_new_ordering(vec![ |
117 | | /// PhysicalSortExpr::new_default(col_a).asc(), |
118 | | /// PhysicalSortExpr::new_default(col_c).desc(), |
119 | | /// ]); |
120 | | /// |
121 | | /// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC,c@2 DESC]], const: [b@1]") |
122 | | /// ``` |
123 | | #[derive(Debug, Clone)] |
124 | | pub struct EquivalenceProperties { |
125 | | /// Collection of equivalence classes that store expressions with the same |
126 | | /// value. |
127 | | pub eq_group: EquivalenceGroup, |
128 | | /// Equivalent sort expressions for this table. |
129 | | pub oeq_class: OrderingEquivalenceClass, |
130 | | /// Expressions whose values are constant throughout the table. |
131 | | /// TODO: We do not need to track constants separately, they can be tracked |
132 | | /// inside `eq_groups` as `Literal` expressions. |
133 | | pub constants: Vec<ConstExpr>, |
134 | | /// Schema associated with this object. |
135 | | schema: SchemaRef, |
136 | | } |
137 | | |
138 | | impl EquivalenceProperties { |
139 | | /// Creates an empty `EquivalenceProperties` object. |
140 | 1.21k | pub fn new(schema: SchemaRef) -> Self { |
141 | 1.21k | Self { |
142 | 1.21k | eq_group: EquivalenceGroup::empty(), |
143 | 1.21k | oeq_class: OrderingEquivalenceClass::empty(), |
144 | 1.21k | constants: vec![], |
145 | 1.21k | schema, |
146 | 1.21k | } |
147 | 1.21k | } |
148 | | |
149 | | /// Creates a new `EquivalenceProperties` object with the given orderings. |
150 | 1.89k | pub fn new_with_orderings(schema: SchemaRef, orderings: &[LexOrdering]) -> Self { |
151 | 1.89k | Self { |
152 | 1.89k | eq_group: EquivalenceGroup::empty(), |
153 | 1.89k | oeq_class: OrderingEquivalenceClass::new(orderings.to_vec()), |
154 | 1.89k | constants: vec![], |
155 | 1.89k | schema, |
156 | 1.89k | } |
157 | 1.89k | } |
158 | | |
159 | | /// Returns the associated schema. |
160 | 26.2k | pub fn schema(&self) -> &SchemaRef { |
161 | 26.2k | &self.schema |
162 | 26.2k | } |
163 | | |
164 | | /// Returns a reference to the ordering equivalence class within. |
165 | 4.73k | pub fn oeq_class(&self) -> &OrderingEquivalenceClass { |
166 | 4.73k | &self.oeq_class |
167 | 4.73k | } |
168 | | |
169 | | /// Returns a reference to the equivalence group within. |
170 | 2.61k | pub fn eq_group(&self) -> &EquivalenceGroup { |
171 | 2.61k | &self.eq_group |
172 | 2.61k | } |
173 | | |
174 | | /// Returns a reference to the constant expressions |
175 | 4.73k | pub fn constants(&self) -> &[ConstExpr] { |
176 | 4.73k | &self.constants |
177 | 4.73k | } |
178 | | |
179 | | /// Returns the output ordering of the properties. |
180 | 4.73k | pub fn output_ordering(&self) -> Option<LexOrdering> { |
181 | 4.73k | let constants = self.constants(); |
182 | 4.73k | let mut output_ordering = self.oeq_class().output_ordering().unwrap_or_default(); |
183 | 4.73k | // Prune out constant expressions |
184 | 4.73k | output_ordering |
185 | 4.73k | .retain(|sort_expr| !const_exprs_contains(constants, &sort_expr.expr)2.18k ); |
186 | 4.73k | (!output_ordering.is_empty()).then_some(output_ordering) |
187 | 4.73k | } |
188 | | |
189 | | /// Returns the normalized version of the ordering equivalence class within. |
190 | | /// Normalization removes constants and duplicates as well as standardizing |
191 | | /// expressions according to the equivalence group within. |
192 | 1.63k | pub fn normalized_oeq_class(&self) -> OrderingEquivalenceClass { |
193 | 1.63k | OrderingEquivalenceClass::new( |
194 | 1.63k | self.oeq_class |
195 | 1.63k | .iter() |
196 | 1.63k | .map(|ordering| self.normalize_sort_exprs(ordering)947 ) |
197 | 1.63k | .collect(), |
198 | 1.63k | ) |
199 | 1.63k | } |
200 | | |
201 | | /// Extends this `EquivalenceProperties` with the `other` object. |
202 | 3 | pub fn extend(mut self, other: Self) -> Self { |
203 | 3 | self.eq_group.extend(other.eq_group); |
204 | 3 | self.oeq_class.extend(other.oeq_class); |
205 | 3 | self.with_constants(other.constants) |
206 | 3 | } |
207 | | |
208 | | /// Clears (empties) the ordering equivalence class within this object. |
209 | | /// Call this method when existing orderings are invalidated. |
210 | 80 | pub fn clear_orderings(&mut self) { |
211 | 80 | self.oeq_class.clear(); |
212 | 80 | } |
213 | | |
214 | | /// Removes constant expressions that may change across partitions. |
215 | | /// This method should be used when data from different partitions are merged. |
216 | 96 | pub fn clear_per_partition_constants(&mut self) { |
217 | 96 | self.constants.retain(|item| item.across_partitions()0 ); |
218 | 96 | } |
219 | | |
220 | | /// Extends this `EquivalenceProperties` by adding the orderings inside the |
221 | | /// ordering equivalence class `other`. |
222 | 454 | pub fn add_ordering_equivalence_class(&mut self, other: OrderingEquivalenceClass) { |
223 | 454 | self.oeq_class.extend(other); |
224 | 454 | } |
225 | | |
226 | | /// Adds new orderings into the existing ordering equivalence class. |
227 | 24 | pub fn add_new_orderings( |
228 | 24 | &mut self, |
229 | 24 | orderings: impl IntoIterator<Item = LexOrdering>, |
230 | 24 | ) { |
231 | 24 | self.oeq_class.add_new_orderings(orderings); |
232 | 24 | } |
233 | | |
234 | | /// Adds a single ordering to the existing ordering equivalence class. |
235 | 0 | pub fn add_new_ordering(&mut self, ordering: LexOrdering) { |
236 | 0 | self.add_new_orderings([ordering]); |
237 | 0 | } |
238 | | |
239 | | /// Incorporates the given equivalence group to into the existing |
240 | | /// equivalence group within. |
241 | 1.14k | pub fn add_equivalence_group(&mut self, other_eq_group: EquivalenceGroup) { |
242 | 1.14k | self.eq_group.extend(other_eq_group); |
243 | 1.14k | } |
244 | | |
245 | | /// Adds a new equality condition into the existing equivalence group. |
246 | | /// If the given equality defines a new equivalence class, adds this new |
247 | | /// equivalence class to the equivalence group. |
248 | 5 | pub fn add_equal_conditions( |
249 | 5 | &mut self, |
250 | 5 | left: &Arc<dyn PhysicalExpr>, |
251 | 5 | right: &Arc<dyn PhysicalExpr>, |
252 | 5 | ) -> Result<()> { |
253 | 5 | // Discover new constants in light of new the equality: |
254 | 5 | if self.is_expr_constant(left) { |
255 | | // Left expression is constant, add right as constant |
256 | 0 | if !const_exprs_contains(&self.constants, right) { |
257 | 0 | self.constants |
258 | 0 | .push(ConstExpr::from(right).with_across_partitions(true)); |
259 | 0 | } |
260 | 5 | } else if self.is_expr_constant(right) { |
261 | | // Right expression is constant, add left as constant |
262 | 4 | if !const_exprs_contains(&self.constants, left) { |
263 | 4 | self.constants |
264 | 4 | .push(ConstExpr::from(left).with_across_partitions(true)); |
265 | 4 | }0 |
266 | 1 | } |
267 | | |
268 | | // Add equal expressions to the state |
269 | 5 | self.eq_group.add_equal_conditions(left, right); |
270 | 5 | |
271 | 5 | // Discover any new orderings |
272 | 5 | self.discover_new_orderings(left)?0 ; |
273 | 5 | Ok(()) |
274 | 5 | } |
275 | | |
276 | | /// Track/register physical expressions with constant values. |
277 | | #[deprecated(since = "43.0.0", note = "Use [`with_constants`] instead")] |
278 | 0 | pub fn add_constants(self, constants: impl IntoIterator<Item = ConstExpr>) -> Self { |
279 | 0 | self.with_constants(constants) |
280 | 0 | } |
281 | | |
282 | | /// Track/register physical expressions with constant values. |
283 | 789 | pub fn with_constants( |
284 | 789 | mut self, |
285 | 789 | constants: impl IntoIterator<Item = ConstExpr>, |
286 | 789 | ) -> Self { |
287 | 789 | let (const_exprs, across_partition_flags): ( |
288 | 789 | Vec<Arc<dyn PhysicalExpr>>, |
289 | 789 | Vec<bool>, |
290 | 789 | ) = constants |
291 | 789 | .into_iter() |
292 | 789 | .map(|const_expr| { |
293 | 276 | let across_partitions = const_expr.across_partitions(); |
294 | 276 | let expr = const_expr.owned_expr(); |
295 | 276 | (expr, across_partitions) |
296 | 789 | }) |
297 | 789 | .unzip(); |
298 | 789 | for (expr, across_partitions276 ) in self |
299 | 789 | .eq_group |
300 | 789 | .normalize_exprs(const_exprs) |
301 | 789 | .into_iter() |
302 | 789 | .zip(across_partition_flags) |
303 | | { |
304 | 276 | if !const_exprs_contains(&self.constants, &expr) { |
305 | 268 | let const_expr = |
306 | 268 | ConstExpr::from(expr).with_across_partitions(across_partitions); |
307 | 268 | self.constants.push(const_expr); |
308 | 268 | }8 |
309 | | } |
310 | | |
311 | 789 | for ordering311 in self.normalized_oeq_class().iter() { |
312 | 311 | if let Err(e0 ) = self.discover_new_orderings(&ordering[0].expr) { |
313 | 0 | log::debug!("error discovering new orderings: {e}"); |
314 | 311 | } |
315 | | } |
316 | | |
317 | 789 | self |
318 | 789 | } |
319 | | |
320 | | // Discover new valid orderings in light of a new equality. |
321 | | // Accepts a single argument (`expr`) which is used to determine |
322 | | // which orderings should be updated. |
323 | | // When constants or equivalence classes are changed, there may be new orderings |
324 | | // that can be discovered with the new equivalence properties. |
325 | | // For a discussion, see: https://github.com/apache/datafusion/issues/9812 |
326 | 316 | fn discover_new_orderings(&mut self, expr: &Arc<dyn PhysicalExpr>) -> Result<()> { |
327 | 316 | let normalized_expr = self.eq_group().normalize_expr(Arc::clone(expr)); |
328 | 316 | let eq_class = self |
329 | 316 | .eq_group |
330 | 316 | .classes |
331 | 316 | .iter() |
332 | 316 | .find_map(|class| { |
333 | 5 | class |
334 | 5 | .contains(&normalized_expr) |
335 | 5 | .then(|| class.clone().into_vec()) |
336 | 316 | }) |
337 | 316 | .unwrap_or_else(|| vec![Arc::clone(&normalized_expr)]311 ); |
338 | 316 | |
339 | 316 | let mut new_orderings: Vec<LexOrdering> = vec![]; |
340 | 316 | for (ordering, next_expr184 ) in self |
341 | 316 | .normalized_oeq_class() |
342 | 316 | .iter() |
343 | 331 | .filter(|ordering| ordering[0].expr.eq(&normalized_expr)) |
344 | 316 | // First expression after leading ordering |
345 | 316 | .filter_map(|ordering| Some(ordering).zip(ordering.get(1))313 ) |
346 | | { |
347 | 184 | let leading_ordering = ordering[0].options; |
348 | | // Currently, we only handle expressions with a single child. |
349 | | // TODO: It should be possible to handle expressions orderings like |
350 | | // f(a, b, c), a, b, c if f is monotonic in all arguments. |
351 | 368 | for equivalent_expr184 in &eq_class { |
352 | 184 | let children = equivalent_expr.children(); |
353 | 184 | if children.len() == 1 |
354 | 0 | && children[0].eq(&next_expr.expr) |
355 | 0 | && SortProperties::Ordered(leading_ordering) |
356 | 0 | == equivalent_expr |
357 | 0 | .get_properties(&[ExprProperties { |
358 | 0 | sort_properties: SortProperties::Ordered( |
359 | 0 | leading_ordering, |
360 | 0 | ), |
361 | 0 | range: Interval::make_unbounded( |
362 | 0 | &equivalent_expr.data_type(&self.schema)?, |
363 | 0 | )?, |
364 | 0 | }])? |
365 | | .sort_properties |
366 | | { |
367 | | // Assume existing ordering is [a ASC, b ASC] |
368 | | // When equality a = f(b) is given, If we know that given ordering `[b ASC]`, ordering `[f(b) ASC]` is valid, |
369 | | // then we can deduce that ordering `[b ASC]` is also valid. |
370 | | // Hence, ordering `[b ASC]` can be added to the state as valid ordering. |
371 | | // (e.g. existing ordering where leading ordering is removed) |
372 | 0 | new_orderings.push(ordering[1..].to_vec()); |
373 | 0 | break; |
374 | 184 | } |
375 | | } |
376 | | } |
377 | | |
378 | 316 | self.oeq_class.add_new_orderings(new_orderings); |
379 | 316 | Ok(()) |
380 | 316 | } |
381 | | |
382 | | /// Updates the ordering equivalence group within assuming that the table |
383 | | /// is re-sorted according to the argument `sort_exprs`. Note that constants |
384 | | /// and equivalence classes are unchanged as they are unaffected by a re-sort. |
385 | 35 | pub fn with_reorder(mut self, sort_exprs: Vec<PhysicalSortExpr>) -> Self { |
386 | 35 | // TODO: In some cases, existing ordering equivalences may still be valid add this analysis. |
387 | 35 | self.oeq_class = OrderingEquivalenceClass::new(vec![sort_exprs]); |
388 | 35 | self |
389 | 35 | } |
390 | | |
391 | | /// Normalizes the given sort expressions (i.e. `sort_exprs`) using the |
392 | | /// equivalence group and the ordering equivalence class within. |
393 | | /// |
394 | | /// Assume that `self.eq_group` states column `a` and `b` are aliases. |
395 | | /// Also assume that `self.oeq_class` states orderings `d ASC` and `a ASC, c ASC` |
396 | | /// are equivalent (in the sense that both describe the ordering of the table). |
397 | | /// If the `sort_exprs` argument were `vec![b ASC, c ASC, a ASC]`, then this |
398 | | /// function would return `vec![a ASC, c ASC]`. Internally, it would first |
399 | | /// normalize to `vec![a ASC, c ASC, a ASC]` and end up with the final result |
400 | | /// after deduplication. |
401 | 947 | fn normalize_sort_exprs(&self, sort_exprs: LexOrderingRef) -> LexOrdering { |
402 | 947 | // Convert sort expressions to sort requirements: |
403 | 947 | let sort_reqs = PhysicalSortRequirement::from_sort_exprs(sort_exprs.iter()); |
404 | 947 | // Normalize the requirements: |
405 | 947 | let normalized_sort_reqs = self.normalize_sort_requirements(&sort_reqs); |
406 | 947 | // Convert sort requirements back to sort expressions: |
407 | 947 | PhysicalSortRequirement::to_sort_exprs(normalized_sort_reqs) |
408 | 947 | } |
409 | | |
410 | | /// Normalizes the given sort requirements (i.e. `sort_reqs`) using the |
411 | | /// equivalence group and the ordering equivalence class within. It works by: |
412 | | /// - Removing expressions that have a constant value from the given requirement. |
413 | | /// - Replacing sections that belong to some equivalence class in the equivalence |
414 | | /// group with the first entry in the matching equivalence class. |
415 | | /// |
416 | | /// Assume that `self.eq_group` states column `a` and `b` are aliases. |
417 | | /// Also assume that `self.oeq_class` states orderings `d ASC` and `a ASC, c ASC` |
418 | | /// are equivalent (in the sense that both describe the ordering of the table). |
419 | | /// If the `sort_reqs` argument were `vec![b ASC, c ASC, a ASC]`, then this |
420 | | /// function would return `vec![a ASC, c ASC]`. Internally, it would first |
421 | | /// normalize to `vec![a ASC, c ASC, a ASC]` and end up with the final result |
422 | | /// after deduplication. |
423 | 1.25k | fn normalize_sort_requirements( |
424 | 1.25k | &self, |
425 | 1.25k | sort_reqs: LexRequirementRef, |
426 | 1.25k | ) -> LexRequirement { |
427 | 1.25k | let normalized_sort_reqs = self.eq_group.normalize_sort_requirements(sort_reqs); |
428 | 1.25k | let mut constant_exprs = vec![]; |
429 | 1.25k | constant_exprs.extend( |
430 | 1.25k | self.constants |
431 | 1.25k | .iter() |
432 | 1.25k | .map(|const_expr| Arc::clone(const_expr.expr())1.17k ), |
433 | 1.25k | ); |
434 | 1.25k | let constants_normalized = self.eq_group.normalize_exprs(constant_exprs); |
435 | 1.25k | // Prune redundant sections in the requirement: |
436 | 1.25k | collapse_lex_req( |
437 | 1.25k | normalized_sort_reqs |
438 | 1.25k | .iter() |
439 | 3.41k | .filter(|&order| { |
440 | 3.41k | !physical_exprs_contains(&constants_normalized, &order.expr) |
441 | 3.41k | }) |
442 | 1.25k | .cloned() |
443 | 1.25k | .collect(), |
444 | 1.25k | ) |
445 | 1.25k | } |
446 | | |
447 | | /// Checks whether the given ordering is satisfied by any of the existing |
448 | | /// orderings. |
449 | 63 | pub fn ordering_satisfy(&self, given: LexOrderingRef) -> bool { |
450 | 63 | // Convert the given sort expressions to sort requirements: |
451 | 63 | let sort_requirements = PhysicalSortRequirement::from_sort_exprs(given.iter()); |
452 | 63 | self.ordering_satisfy_requirement(&sort_requirements) |
453 | 63 | } |
454 | | |
455 | | /// Checks whether the given sort requirements are satisfied by any of the |
456 | | /// existing orderings. |
457 | 199 | pub fn ordering_satisfy_requirement(&self, reqs: LexRequirementRef) -> bool { |
458 | 199 | let mut eq_properties = self.clone(); |
459 | 199 | // First, standardize the given requirement: |
460 | 199 | let normalized_reqs = eq_properties.normalize_sort_requirements(reqs); |
461 | 268 | for normalized_req149 in normalized_reqs { |
462 | | // Check whether given ordering is satisfied |
463 | 149 | if !eq_properties.ordering_satisfy_single(&normalized_req) { |
464 | 80 | return false; |
465 | 69 | } |
466 | 69 | // Treat satisfied keys as constants in subsequent iterations. We |
467 | 69 | // can do this because the "next" key only matters in a lexicographical |
468 | 69 | // ordering when the keys to its left have the same values. |
469 | 69 | // |
470 | 69 | // Note that these expressions are not properly "constants". This is just |
471 | 69 | // an implementation strategy confined to this function. |
472 | 69 | // |
473 | 69 | // For example, assume that the requirement is `[a ASC, (b + c) ASC]`, |
474 | 69 | // and existing equivalent orderings are `[a ASC, b ASC]` and `[c ASC]`. |
475 | 69 | // From the analysis above, we know that `[a ASC]` is satisfied. Then, |
476 | 69 | // we add column `a` as constant to the algorithm state. This enables us |
477 | 69 | // to deduce that `(b + c) ASC` is satisfied, given `a` is constant. |
478 | 69 | eq_properties = eq_properties |
479 | 69 | .with_constants(std::iter::once(ConstExpr::from(normalized_req.expr))); |
480 | | } |
481 | 119 | true |
482 | 199 | } |
483 | | |
484 | | /// Determines whether the ordering specified by the given sort requirement |
485 | | /// is satisfied based on the orderings within, equivalence classes, and |
486 | | /// constant expressions. |
487 | | /// |
488 | | /// # Arguments |
489 | | /// |
490 | | /// - `req`: A reference to a `PhysicalSortRequirement` for which the ordering |
491 | | /// satisfaction check will be done. |
492 | | /// |
493 | | /// # Returns |
494 | | /// |
495 | | /// Returns `true` if the specified ordering is satisfied, `false` otherwise. |
496 | 149 | fn ordering_satisfy_single(&self, req: &PhysicalSortRequirement) -> bool { |
497 | 149 | let ExprProperties { |
498 | 149 | sort_properties, .. |
499 | 149 | } = self.get_expr_properties(Arc::clone(&req.expr)); |
500 | 149 | match sort_properties { |
501 | 74 | SortProperties::Ordered(options) => { |
502 | 74 | let sort_expr = PhysicalSortExpr { |
503 | 74 | expr: Arc::clone(&req.expr), |
504 | 74 | options, |
505 | 74 | }; |
506 | 74 | sort_expr.satisfy(req, self.schema()) |
507 | | } |
508 | | // Singleton expressions satisfies any ordering. |
509 | 0 | SortProperties::Singleton => true, |
510 | 75 | SortProperties::Unordered => false, |
511 | | } |
512 | 149 | } |
513 | | |
514 | | /// Checks whether the `given`` sort requirements are equal or more specific |
515 | | /// than the `reference` sort requirements. |
516 | 0 | pub fn requirements_compatible( |
517 | 0 | &self, |
518 | 0 | given: LexRequirementRef, |
519 | 0 | reference: LexRequirementRef, |
520 | 0 | ) -> bool { |
521 | 0 | let normalized_given = self.normalize_sort_requirements(given); |
522 | 0 | let normalized_reference = self.normalize_sort_requirements(reference); |
523 | 0 |
|
524 | 0 | (normalized_reference.len() <= normalized_given.len()) |
525 | 0 | && normalized_reference |
526 | 0 | .into_iter() |
527 | 0 | .zip(normalized_given) |
528 | 0 | .all(|(reference, given)| given.compatible(&reference)) |
529 | 0 | } |
530 | | |
531 | | /// Returns the finer ordering among the orderings `lhs` and `rhs`, breaking |
532 | | /// any ties by choosing `lhs`. |
533 | | /// |
534 | | /// The finer ordering is the ordering that satisfies both of the orderings. |
535 | | /// If the orderings are incomparable, returns `None`. |
536 | | /// |
537 | | /// For example, the finer ordering among `[a ASC]` and `[a ASC, b ASC]` is |
538 | | /// the latter. |
539 | 54 | pub fn get_finer_ordering( |
540 | 54 | &self, |
541 | 54 | lhs: LexOrderingRef, |
542 | 54 | rhs: LexOrderingRef, |
543 | 54 | ) -> Option<LexOrdering> { |
544 | 54 | // Convert the given sort expressions to sort requirements: |
545 | 54 | let lhs = PhysicalSortRequirement::from_sort_exprs(lhs); |
546 | 54 | let rhs = PhysicalSortRequirement::from_sort_exprs(rhs); |
547 | 54 | let finer = self.get_finer_requirement(&lhs, &rhs); |
548 | 54 | // Convert the chosen sort requirements back to sort expressions: |
549 | 54 | finer.map(PhysicalSortRequirement::to_sort_exprs) |
550 | 54 | } |
551 | | |
552 | | /// Returns the finer ordering among the requirements `lhs` and `rhs`, |
553 | | /// breaking any ties by choosing `lhs`. |
554 | | /// |
555 | | /// The finer requirements are the ones that satisfy both of the given |
556 | | /// requirements. If the requirements are incomparable, returns `None`. |
557 | | /// |
558 | | /// For example, the finer requirements among `[a ASC]` and `[a ASC, b ASC]` |
559 | | /// is the latter. |
560 | 54 | pub fn get_finer_requirement( |
561 | 54 | &self, |
562 | 54 | req1: LexRequirementRef, |
563 | 54 | req2: LexRequirementRef, |
564 | 54 | ) -> Option<LexRequirement> { |
565 | 54 | let mut lhs = self.normalize_sort_requirements(req1); |
566 | 54 | let mut rhs = self.normalize_sort_requirements(req2); |
567 | 54 | lhs.inner |
568 | 54 | .iter_mut() |
569 | 54 | .zip(rhs.inner.iter_mut()) |
570 | 54 | .all(|(lhs, rhs)| { |
571 | 6 | lhs.expr.eq(&rhs.expr) |
572 | 6 | && match (lhs.options, rhs.options) { |
573 | 6 | (Some(lhs_opt), Some(rhs_opt)) => lhs_opt == rhs_opt, |
574 | 0 | (Some(options), None) => { |
575 | 0 | rhs.options = Some(options); |
576 | 0 | true |
577 | | } |
578 | 0 | (None, Some(options)) => { |
579 | 0 | lhs.options = Some(options); |
580 | 0 | true |
581 | | } |
582 | 0 | (None, None) => true, |
583 | | } |
584 | 54 | }6 ) |
585 | 54 | .then_some(if lhs.len() >= rhs.len() { lhs48 } else { rhs6 }) |
586 | 54 | } |
587 | | |
588 | | /// we substitute the ordering according to input expression type, this is a simplified version |
589 | | /// In this case, we just substitute when the expression satisfy the following condition: |
590 | | /// I. just have one column and is a CAST expression |
591 | | /// TODO: Add one-to-ones analysis for monotonic ScalarFunctions. |
592 | | /// TODO: we could precompute all the scenario that is computable, for example: atan(x + 1000) should also be substituted if |
593 | | /// x is DESC or ASC |
594 | | /// After substitution, we may generate more than 1 `LexOrdering`. As an example, |
595 | | /// `[a ASC, b ASC]` will turn into `[a ASC, b ASC], [CAST(a) ASC, b ASC]` when projection expressions `a, b, CAST(a)` is applied. |
596 | 1 | pub fn substitute_ordering_component( |
597 | 1 | &self, |
598 | 1 | mapping: &ProjectionMapping, |
599 | 1 | sort_expr: &[PhysicalSortExpr], |
600 | 1 | ) -> Result<Vec<Vec<PhysicalSortExpr>>> { |
601 | 1 | let new_orderings = sort_expr |
602 | 1 | .iter() |
603 | 1 | .map(|sort_expr| { |
604 | 1 | let referring_exprs: Vec<_> = mapping |
605 | 1 | .iter() |
606 | 3 | .map(|(source, _target)| source) |
607 | 3 | .filter(|source| expr_refers(source, &sort_expr.expr)) |
608 | 1 | .cloned() |
609 | 1 | .collect(); |
610 | 1 | let mut res = vec![sort_expr.clone()]; |
611 | | // TODO: Add one-to-ones analysis for ScalarFunctions. |
612 | 2 | for r_expr1 in referring_exprs { |
613 | | // we check whether this expression is substitutable or not |
614 | 1 | if let Some(cast_expr0 ) = r_expr.as_any().downcast_ref::<CastExpr>() { |
615 | | // we need to know whether the Cast Expr matches or not |
616 | 0 | let expr_type = sort_expr.expr.data_type(&self.schema)?; |
617 | 0 | if cast_expr.expr.eq(&sort_expr.expr) |
618 | 0 | && cast_expr.is_bigger_cast(expr_type) |
619 | 0 | { |
620 | 0 | res.push(PhysicalSortExpr { |
621 | 0 | expr: Arc::clone(&r_expr), |
622 | 0 | options: sort_expr.options, |
623 | 0 | }); |
624 | 0 | } |
625 | 1 | } |
626 | | } |
627 | 1 | Ok(res) |
628 | 1 | }) |
629 | 1 | .collect::<Result<Vec<_>>>()?0 ; |
630 | | // Generate all valid orderings, given substituted expressions. |
631 | 1 | let res = new_orderings |
632 | 1 | .into_iter() |
633 | 1 | .multi_cartesian_product() |
634 | 1 | .collect::<Vec<_>>(); |
635 | 1 | Ok(res) |
636 | 1 | } |
637 | | |
638 | | /// In projection, supposed we have a input function 'A DESC B DESC' and the output shares the same expression |
639 | | /// with A and B, we could surely use the ordering of the original ordering, However, if the A has been changed, |
640 | | /// for example, A-> Cast(A, Int64) or any other form, it is invalid if we continue using the original ordering |
641 | | /// Since it would cause bug in dependency constructions, we should substitute the input order in order to get correct |
642 | | /// dependency map, happen in issue 8838: <https://github.com/apache/datafusion/issues/8838> |
643 | 2 | pub fn substitute_oeq_class(&mut self, mapping: &ProjectionMapping) -> Result<()> { |
644 | 2 | let orderings = &self.oeq_class.orderings; |
645 | 2 | let new_order = orderings |
646 | 2 | .iter() |
647 | 2 | .map(|order| self.substitute_ordering_component(mapping, order)1 ) |
648 | 2 | .collect::<Result<Vec<_>>>()?0 ; |
649 | 2 | let new_order = new_order.into_iter().flatten().collect(); |
650 | 2 | self.oeq_class = OrderingEquivalenceClass::new(new_order); |
651 | 2 | Ok(()) |
652 | 2 | } |
653 | | /// Projects argument `expr` according to `projection_mapping`, taking |
654 | | /// equivalences into account. |
655 | | /// |
656 | | /// For example, assume that columns `a` and `c` are always equal, and that |
657 | | /// `projection_mapping` encodes following mapping: |
658 | | /// |
659 | | /// ```text |
660 | | /// a -> a1 |
661 | | /// b -> b1 |
662 | | /// ``` |
663 | | /// |
664 | | /// Then, this function projects `a + b` to `Some(a1 + b1)`, `c + b` to |
665 | | /// `Some(a1 + b1)` and `d` to `None`, meaning that it cannot be projected. |
666 | 1 | pub fn project_expr( |
667 | 1 | &self, |
668 | 1 | expr: &Arc<dyn PhysicalExpr>, |
669 | 1 | projection_mapping: &ProjectionMapping, |
670 | 1 | ) -> Option<Arc<dyn PhysicalExpr>> { |
671 | 1 | self.eq_group.project_expr(projection_mapping, expr) |
672 | 1 | } |
673 | | |
674 | | /// Constructs a dependency map based on existing orderings referred to in |
675 | | /// the projection. |
676 | | /// |
677 | | /// This function analyzes the orderings in the normalized order-equivalence |
678 | | /// class and builds a dependency map. The dependency map captures relationships |
679 | | /// between expressions within the orderings, helping to identify dependencies |
680 | | /// and construct valid projected orderings during projection operations. |
681 | | /// |
682 | | /// # Parameters |
683 | | /// |
684 | | /// - `mapping`: A reference to the `ProjectionMapping` that defines the |
685 | | /// relationship between source and target expressions. |
686 | | /// |
687 | | /// # Returns |
688 | | /// |
689 | | /// A [`DependencyMap`] representing the dependency map, where each |
690 | | /// [`DependencyNode`] contains dependencies for the key [`PhysicalSortExpr`]. |
691 | | /// |
692 | | /// # Example |
693 | | /// |
694 | | /// Assume we have two equivalent orderings: `[a ASC, b ASC]` and `[a ASC, c ASC]`, |
695 | | /// and the projection mapping is `[a -> a_new, b -> b_new, b + c -> b + c]`. |
696 | | /// Then, the dependency map will be: |
697 | | /// |
698 | | /// ```text |
699 | | /// a ASC: Node {Some(a_new ASC), HashSet{}} |
700 | | /// b ASC: Node {Some(b_new ASC), HashSet{a ASC}} |
701 | | /// c ASC: Node {None, HashSet{a ASC}} |
702 | | /// ``` |
703 | 52 | fn construct_dependency_map(&self, mapping: &ProjectionMapping) -> DependencyMap { |
704 | 52 | let mut dependency_map = IndexMap::new(); |
705 | 52 | for ordering1 in self.normalized_oeq_class().iter() { |
706 | 1 | for (idx, sort_expr) in ordering.iter().enumerate() { |
707 | 1 | let target_sort_expr = |
708 | 1 | self.project_expr(&sort_expr.expr, mapping).map(|expr| { |
709 | 1 | PhysicalSortExpr { |
710 | 1 | expr, |
711 | 1 | options: sort_expr.options, |
712 | 1 | } |
713 | 1 | }); |
714 | 1 | let is_projected = target_sort_expr.is_some(); |
715 | 1 | if is_projected |
716 | 0 | || mapping |
717 | 0 | .iter() |
718 | 0 | .any(|(source, _)| expr_refers(source, &sort_expr.expr)) |
719 | 1 | { |
720 | 1 | // Previous ordering is a dependency. Note that there is no, |
721 | 1 | // dependency for a leading ordering (i.e. the first sort |
722 | 1 | // expression). |
723 | 1 | let dependency = idx.checked_sub(1).map(|a| &ordering[a]0 ); |
724 | 1 | // Add sort expressions that can be projected or referred to |
725 | 1 | // by any of the projection expressions to the dependency map: |
726 | 1 | dependency_map |
727 | 1 | .entry(sort_expr.clone()) |
728 | 1 | .or_insert_with(|| DependencyNode { |
729 | 1 | target_sort_expr: target_sort_expr.clone(), |
730 | 1 | dependencies: IndexSet::new(), |
731 | 1 | }) |
732 | 1 | .insert_dependency(dependency); |
733 | 1 | }0 |
734 | 1 | if !is_projected { |
735 | | // If we can not project, stop constructing the dependency |
736 | | // map as remaining dependencies will be invalid after projection. |
737 | 0 | break; |
738 | 1 | } |
739 | | } |
740 | | } |
741 | 52 | dependency_map |
742 | 52 | } |
743 | | |
744 | | /// Returns a new `ProjectionMapping` where source expressions are normalized. |
745 | | /// |
746 | | /// This normalization ensures that source expressions are transformed into a |
747 | | /// consistent representation. This is beneficial for algorithms that rely on |
748 | | /// exact equalities, as it allows for more precise and reliable comparisons. |
749 | | /// |
750 | | /// # Parameters |
751 | | /// |
752 | | /// - `mapping`: A reference to the original `ProjectionMapping` to be normalized. |
753 | | /// |
754 | | /// # Returns |
755 | | /// |
756 | | /// A new `ProjectionMapping` with normalized source expressions. |
757 | 52 | fn normalized_mapping(&self, mapping: &ProjectionMapping) -> ProjectionMapping { |
758 | 52 | // Construct the mapping where source expressions are normalized. In this way |
759 | 52 | // In the algorithms below we can work on exact equalities |
760 | 52 | ProjectionMapping { |
761 | 52 | map: mapping |
762 | 52 | .iter() |
763 | 61 | .map(|(source, target)| { |
764 | 61 | let normalized_source = |
765 | 61 | self.eq_group.normalize_expr(Arc::clone(source)); |
766 | 61 | (normalized_source, Arc::clone(target)) |
767 | 61 | }) |
768 | 52 | .collect(), |
769 | 52 | } |
770 | 52 | } |
771 | | |
772 | | /// Computes projected orderings based on a given projection mapping. |
773 | | /// |
774 | | /// This function takes a `ProjectionMapping` and computes the possible |
775 | | /// orderings for the projected expressions. It considers dependencies |
776 | | /// between expressions and generates valid orderings according to the |
777 | | /// specified sort properties. |
778 | | /// |
779 | | /// # Parameters |
780 | | /// |
781 | | /// - `mapping`: A reference to the `ProjectionMapping` that defines the |
782 | | /// relationship between source and target expressions. |
783 | | /// |
784 | | /// # Returns |
785 | | /// |
786 | | /// A vector of `LexOrdering` containing all valid orderings after projection. |
787 | 52 | fn projected_orderings(&self, mapping: &ProjectionMapping) -> Vec<LexOrdering> { |
788 | 52 | let mapping = self.normalized_mapping(mapping); |
789 | 52 | |
790 | 52 | // Get dependency map for existing orderings: |
791 | 52 | let dependency_map = self.construct_dependency_map(&mapping); |
792 | 61 | let orderings = mapping.iter().flat_map(|(source, target)| { |
793 | 61 | referred_dependencies(&dependency_map, source) |
794 | 61 | .into_iter() |
795 | 61 | .filter_map(|relevant_deps| { |
796 | 1 | if let Ok(SortProperties::Ordered(options)) = |
797 | 61 | get_expr_properties(source, &relevant_deps, &self.schema) |
798 | 61 | .map(|prop| prop.sort_properties) |
799 | | { |
800 | 1 | Some((options, relevant_deps)) |
801 | | } else { |
802 | | // Do not consider unordered cases |
803 | 60 | None |
804 | | } |
805 | 61 | }) |
806 | 61 | .flat_map(|(options, relevant_deps)| { |
807 | 1 | let sort_expr = PhysicalSortExpr { |
808 | 1 | expr: Arc::clone(target), |
809 | 1 | options, |
810 | 1 | }; |
811 | 1 | // Generate dependent orderings (i.e. prefixes for `sort_expr`): |
812 | 1 | let mut dependency_orderings = |
813 | 1 | generate_dependency_orderings(&relevant_deps, &dependency_map); |
814 | | // Append `sort_expr` to the dependent orderings: |
815 | 1 | for ordering in dependency_orderings.iter_mut() { |
816 | 1 | ordering.push(sort_expr.clone()); |
817 | 1 | } |
818 | 1 | dependency_orderings |
819 | 61 | }) |
820 | 61 | }); |
821 | 52 | |
822 | 52 | // Add valid projected orderings. For example, if existing ordering is |
823 | 52 | // `a + b` and projection is `[a -> a_new, b -> b_new]`, we need to |
824 | 52 | // preserve `a_new + b_new` as ordered. Please note that `a_new` and |
825 | 52 | // `b_new` themselves need not be ordered. Such dependencies cannot be |
826 | 52 | // deduced via the pass above. |
827 | 52 | let projected_orderings = dependency_map.iter().flat_map(|(sort_expr, node)| { |
828 | 1 | let mut prefixes = construct_prefix_orderings(sort_expr, &dependency_map); |
829 | 1 | if prefixes.is_empty() { |
830 | 1 | // If prefix is empty, there is no dependency. Insert |
831 | 1 | // empty ordering: |
832 | 1 | prefixes = vec![vec![]]; |
833 | 1 | }0 |
834 | | // Append current ordering on top its dependencies: |
835 | 1 | for ordering in prefixes.iter_mut() { |
836 | 1 | if let Some(target) = &node.target_sort_expr { |
837 | 1 | ordering.push(target.clone()) |
838 | 0 | } |
839 | | } |
840 | 1 | prefixes |
841 | 52 | }); |
842 | 52 | |
843 | 52 | // Simplify each ordering by removing redundant sections: |
844 | 52 | orderings |
845 | 52 | .chain(projected_orderings) |
846 | 52 | .map(collapse_lex_ordering) |
847 | 52 | .collect() |
848 | 52 | } |
849 | | |
850 | | /// Projects constants based on the provided `ProjectionMapping`. |
851 | | /// |
852 | | /// This function takes a `ProjectionMapping` and identifies/projects |
853 | | /// constants based on the existing constants and the mapping. It ensures |
854 | | /// that constants are appropriately propagated through the projection. |
855 | | /// |
856 | | /// # Arguments |
857 | | /// |
858 | | /// - `mapping`: A reference to a `ProjectionMapping` representing the |
859 | | /// mapping of source expressions to target expressions in the projection. |
860 | | /// |
861 | | /// # Returns |
862 | | /// |
863 | | /// Returns a `Vec<Arc<dyn PhysicalExpr>>` containing the projected constants. |
864 | 52 | fn projected_constants(&self, mapping: &ProjectionMapping) -> Vec<ConstExpr> { |
865 | 52 | // First, project existing constants. For example, assume that `a + b` |
866 | 52 | // is known to be constant. If the projection were `a as a_new`, `b as b_new`, |
867 | 52 | // then we would project constant `a + b` as `a_new + b_new`. |
868 | 52 | let mut projected_constants = self |
869 | 52 | .constants |
870 | 52 | .iter() |
871 | 52 | .flat_map(|const_expr| { |
872 | 0 | const_expr.map(|expr| self.eq_group.project_expr(mapping, expr)) |
873 | 52 | }) |
874 | 52 | .collect::<Vec<_>>(); |
875 | | // Add projection expressions that are known to be constant: |
876 | 61 | for (source, target) in mapping.iter()52 { |
877 | 61 | if self.is_expr_constant(source) |
878 | 1 | && !const_exprs_contains(&projected_constants, target) |
879 | 1 | { |
880 | 1 | // Expression evaluates to single value |
881 | 1 | projected_constants |
882 | 1 | .push(ConstExpr::from(target).with_across_partitions(true)); |
883 | 60 | } |
884 | | } |
885 | 52 | projected_constants |
886 | 52 | } |
887 | | |
888 | | /// Projects the equivalences within according to `projection_mapping` |
889 | | /// and `output_schema`. |
890 | 52 | pub fn project( |
891 | 52 | &self, |
892 | 52 | projection_mapping: &ProjectionMapping, |
893 | 52 | output_schema: SchemaRef, |
894 | 52 | ) -> Self { |
895 | 52 | let projected_constants = self.projected_constants(projection_mapping); |
896 | 52 | let projected_eq_group = self.eq_group.project(projection_mapping); |
897 | 52 | let projected_orderings = self.projected_orderings(projection_mapping); |
898 | 52 | Self { |
899 | 52 | eq_group: projected_eq_group, |
900 | 52 | oeq_class: OrderingEquivalenceClass::new(projected_orderings), |
901 | 52 | constants: projected_constants, |
902 | 52 | schema: output_schema, |
903 | 52 | } |
904 | 52 | } |
905 | | |
906 | | /// Returns the longest (potentially partial) permutation satisfying the |
907 | | /// existing ordering. For example, if we have the equivalent orderings |
908 | | /// `[a ASC, b ASC]` and `[c DESC]`, with `exprs` containing `[c, b, a, d]`, |
909 | | /// then this function returns `([a ASC, b ASC, c DESC], [2, 1, 0])`. |
910 | | /// This means that the specification `[a ASC, b ASC, c DESC]` is satisfied |
911 | | /// by the existing ordering, and `[a, b, c]` resides at indices: `2, 1, 0` |
912 | | /// inside the argument `exprs` (respectively). For the mathematical |
913 | | /// definition of "partial permutation", see: |
914 | | /// |
915 | | /// <https://en.wikipedia.org/wiki/Permutation#k-permutations_of_n> |
916 | 200 | pub fn find_longest_permutation( |
917 | 200 | &self, |
918 | 200 | exprs: &[Arc<dyn PhysicalExpr>], |
919 | 200 | ) -> (LexOrdering, Vec<usize>) { |
920 | 200 | let mut eq_properties = self.clone(); |
921 | 200 | let mut result = vec![]; |
922 | 200 | // The algorithm is as follows: |
923 | 200 | // - Iterate over all the expressions and insert ordered expressions |
924 | 200 | // into the result. |
925 | 200 | // - Treat inserted expressions as constants (i.e. add them as constants |
926 | 200 | // to the state). |
927 | 200 | // - Continue the above procedure until no expression is inserted; i.e. |
928 | 200 | // the algorithm reaches a fixed point. |
929 | 200 | // This algorithm should reach a fixed point in at most `exprs.len()` |
930 | 200 | // iterations. |
931 | 200 | let mut search_indices = (0..exprs.len()).collect::<IndexSet<_>>(); |
932 | 234 | for _idx in 0..exprs.len()200 { |
933 | | // Get ordered expressions with their indices. |
934 | 234 | let ordered_exprs = search_indices |
935 | 234 | .iter() |
936 | 318 | .flat_map(|&idx| { |
937 | 318 | let ExprProperties { |
938 | 318 | sort_properties, .. |
939 | 318 | } = eq_properties.get_expr_properties(Arc::clone(&exprs[idx])); |
940 | 318 | match sort_properties { |
941 | 76 | SortProperties::Ordered(options) => Some(( |
942 | 76 | PhysicalSortExpr { |
943 | 76 | expr: Arc::clone(&exprs[idx]), |
944 | 76 | options, |
945 | 76 | }, |
946 | 76 | idx, |
947 | 76 | )), |
948 | | SortProperties::Singleton => { |
949 | | // Assign default ordering to constant expressions |
950 | 2 | let options = SortOptions::default(); |
951 | 2 | Some(( |
952 | 2 | PhysicalSortExpr { |
953 | 2 | expr: Arc::clone(&exprs[idx]), |
954 | 2 | options, |
955 | 2 | }, |
956 | 2 | idx, |
957 | 2 | )) |
958 | | } |
959 | 240 | SortProperties::Unordered => None, |
960 | | } |
961 | 318 | }) |
962 | 234 | .collect::<Vec<_>>(); |
963 | 234 | // We reached a fixed point, exit. |
964 | 234 | if ordered_exprs.is_empty() { |
965 | 156 | break; |
966 | 78 | } |
967 | | // Remove indices that have an ordering from `search_indices`, and |
968 | | // treat ordered expressions as constants in subsequent iterations. |
969 | | // We can do this because the "next" key only matters in a lexicographical |
970 | | // ordering when the keys to its left have the same values. |
971 | | // |
972 | | // Note that these expressions are not properly "constants". This is just |
973 | | // an implementation strategy confined to this function. |
974 | 156 | for (PhysicalSortExpr { expr, .. }, idx78 ) in &ordered_exprs { |
975 | 78 | eq_properties = |
976 | 78 | eq_properties.with_constants(std::iter::once(ConstExpr::from(expr))); |
977 | 78 | search_indices.shift_remove(idx); |
978 | 78 | } |
979 | | // Add new ordered section to the state. |
980 | 78 | result.extend(ordered_exprs); |
981 | | } |
982 | 200 | result.into_iter().unzip() |
983 | 200 | } |
984 | | |
985 | | /// This function determines whether the provided expression is constant |
986 | | /// based on the known constants. |
987 | | /// |
988 | | /// # Arguments |
989 | | /// |
990 | | /// - `expr`: A reference to a `Arc<dyn PhysicalExpr>` representing the |
991 | | /// expression to be checked. |
992 | | /// |
993 | | /// # Returns |
994 | | /// |
995 | | /// Returns `true` if the expression is constant according to equivalence |
996 | | /// group, `false` otherwise. |
997 | 546 | pub fn is_expr_constant(&self, expr: &Arc<dyn PhysicalExpr>) -> bool { |
998 | 546 | // As an example, assume that we know columns `a` and `b` are constant. |
999 | 546 | // Then, `a`, `b` and `a + b` will all return `true` whereas `c` will |
1000 | 546 | // return `false`. |
1001 | 546 | let const_exprs = self |
1002 | 546 | .constants |
1003 | 546 | .iter() |
1004 | 546 | .map(|const_expr| Arc::clone(const_expr.expr())190 ); |
1005 | 546 | let normalized_constants = self.eq_group.normalize_exprs(const_exprs); |
1006 | 546 | let normalized_expr = self.eq_group.normalize_expr(Arc::clone(expr)); |
1007 | 546 | is_constant_recurse(&normalized_constants, &normalized_expr) |
1008 | 546 | } |
1009 | | |
1010 | | /// Retrieves the properties for a given physical expression. |
1011 | | /// |
1012 | | /// This function constructs an [`ExprProperties`] object for the given |
1013 | | /// expression, which encapsulates information about the expression's |
1014 | | /// properties, including its [`SortProperties`] and [`Interval`]. |
1015 | | /// |
1016 | | /// # Parameters |
1017 | | /// |
1018 | | /// - `expr`: An `Arc<dyn PhysicalExpr>` representing the physical expression |
1019 | | /// for which ordering information is sought. |
1020 | | /// |
1021 | | /// # Returns |
1022 | | /// |
1023 | | /// Returns an [`ExprProperties`] object containing the ordering and range |
1024 | | /// information for the given expression. |
1025 | 467 | pub fn get_expr_properties(&self, expr: Arc<dyn PhysicalExpr>) -> ExprProperties { |
1026 | 467 | ExprPropertiesNode::new_unknown(expr) |
1027 | 467 | .transform_up(|expr| update_properties(expr, self)) |
1028 | 467 | .data() |
1029 | 467 | .map(|node| node.data) |
1030 | 467 | .unwrap_or(ExprProperties::new_unknown()) |
1031 | 467 | } |
1032 | | |
1033 | | /// Transforms this `EquivalenceProperties` into a new `EquivalenceProperties` |
1034 | | /// by mapping columns in the original schema to columns in the new schema |
1035 | | /// by index. |
1036 | 0 | pub fn with_new_schema(self, schema: SchemaRef) -> Result<Self> { |
1037 | | // The new schema and the original schema is aligned when they have the |
1038 | | // same number of columns, and fields at the same index have the same |
1039 | | // type in both schemas. |
1040 | 0 | let schemas_aligned = (self.schema.fields.len() == schema.fields.len()) |
1041 | 0 | && self |
1042 | 0 | .schema |
1043 | 0 | .fields |
1044 | 0 | .iter() |
1045 | 0 | .zip(schema.fields.iter()) |
1046 | 0 | .all(|(lhs, rhs)| lhs.data_type().eq(rhs.data_type())); |
1047 | 0 | if !schemas_aligned { |
1048 | | // Rewriting equivalence properties in terms of new schema is not |
1049 | | // safe when schemas are not aligned: |
1050 | 0 | return plan_err!( |
1051 | 0 | "Cannot rewrite old_schema:{:?} with new schema: {:?}", |
1052 | 0 | self.schema, |
1053 | 0 | schema |
1054 | 0 | ); |
1055 | 0 | } |
1056 | | // Rewrite constants according to new schema: |
1057 | 0 | let new_constants = self |
1058 | 0 | .constants |
1059 | 0 | .into_iter() |
1060 | 0 | .map(|const_expr| { |
1061 | 0 | let across_partitions = const_expr.across_partitions(); |
1062 | 0 | let new_const_expr = with_new_schema(const_expr.owned_expr(), &schema)?; |
1063 | 0 | Ok(ConstExpr::new(new_const_expr) |
1064 | 0 | .with_across_partitions(across_partitions)) |
1065 | 0 | }) |
1066 | 0 | .collect::<Result<Vec<_>>>()?; |
1067 | | |
1068 | | // Rewrite orderings according to new schema: |
1069 | 0 | let mut new_orderings = vec![]; |
1070 | 0 | for ordering in self.oeq_class.orderings { |
1071 | 0 | let new_ordering = ordering |
1072 | 0 | .into_iter() |
1073 | 0 | .map(|mut sort_expr| { |
1074 | 0 | sort_expr.expr = with_new_schema(sort_expr.expr, &schema)?; |
1075 | 0 | Ok(sort_expr) |
1076 | 0 | }) |
1077 | 0 | .collect::<Result<_>>()?; |
1078 | 0 | new_orderings.push(new_ordering); |
1079 | | } |
1080 | | |
1081 | | // Rewrite equivalence classes according to the new schema: |
1082 | 0 | let mut eq_classes = vec![]; |
1083 | 0 | for eq_class in self.eq_group.classes { |
1084 | 0 | let new_eq_exprs = eq_class |
1085 | 0 | .into_vec() |
1086 | 0 | .into_iter() |
1087 | 0 | .map(|expr| with_new_schema(expr, &schema)) |
1088 | 0 | .collect::<Result<_>>()?; |
1089 | 0 | eq_classes.push(EquivalenceClass::new(new_eq_exprs)); |
1090 | | } |
1091 | | |
1092 | | // Construct the resulting equivalence properties: |
1093 | 0 | let mut result = EquivalenceProperties::new(schema); |
1094 | 0 | result.constants = new_constants; |
1095 | 0 | result.add_new_orderings(new_orderings); |
1096 | 0 | result.add_equivalence_group(EquivalenceGroup::new(eq_classes)); |
1097 | 0 |
|
1098 | 0 | Ok(result) |
1099 | 0 | } |
1100 | | } |
1101 | | |
1102 | | /// More readable display version of the `EquivalenceProperties`. |
1103 | | /// |
1104 | | /// Format: |
1105 | | /// ```text |
1106 | | /// order: [[a ASC, b ASC], [a ASC, c ASC]], eq: [[a = b], [a = c]], const: [a = 1] |
1107 | | /// ``` |
1108 | | impl Display for EquivalenceProperties { |
1109 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
1110 | 0 | if self.eq_group.is_empty() |
1111 | 0 | && self.oeq_class.is_empty() |
1112 | 0 | && self.constants.is_empty() |
1113 | | { |
1114 | 0 | return write!(f, "No properties"); |
1115 | 0 | } |
1116 | 0 | if !self.oeq_class.is_empty() { |
1117 | 0 | write!(f, "order: {}", self.oeq_class)?; |
1118 | 0 | } |
1119 | 0 | if !self.eq_group.is_empty() { |
1120 | 0 | write!(f, ", eq: {}", self.eq_group)?; |
1121 | 0 | } |
1122 | 0 | if !self.constants.is_empty() { |
1123 | 0 | write!(f, ", const: [")?; |
1124 | 0 | let mut iter = self.constants.iter(); |
1125 | 0 | if let Some(c) = iter.next() { |
1126 | 0 | write!(f, "{}", c)?; |
1127 | 0 | } |
1128 | 0 | for c in iter { |
1129 | 0 | write!(f, ", {}", c)?; |
1130 | | } |
1131 | 0 | write!(f, "]")?; |
1132 | 0 | } |
1133 | 0 | Ok(()) |
1134 | 0 | } |
1135 | | } |
1136 | | |
1137 | | /// Calculates the properties of a given [`ExprPropertiesNode`]. |
1138 | | /// |
1139 | | /// Order information can be retrieved as: |
1140 | | /// - If it is a leaf node, we directly find the order of the node by looking |
1141 | | /// at the given sort expression and equivalence properties if it is a `Column` |
1142 | | /// leaf, or we mark it as unordered. In the case of a `Literal` leaf, we mark |
1143 | | /// it as singleton so that it can cooperate with all ordered columns. |
1144 | | /// - If it is an intermediate node, the children states matter. Each `PhysicalExpr` |
1145 | | /// and operator has its own rules on how to propagate the children orderings. |
1146 | | /// However, before we engage in recursion, we check whether this intermediate |
1147 | | /// node directly matches with the sort expression. If there is a match, the |
1148 | | /// sort expression emerges at that node immediately, discarding the recursive |
1149 | | /// result coming from its children. |
1150 | | /// |
1151 | | /// Range information is calculated as: |
1152 | | /// - If it is a `Literal` node, we set the range as a point value. If it is a |
1153 | | /// `Column` node, we set the datatype of the range, but cannot give an interval |
1154 | | /// for the range, yet. |
1155 | | /// - If it is an intermediate node, the children states matter. Each `PhysicalExpr` |
1156 | | /// and operator has its own rules on how to propagate the children range. |
1157 | 467 | fn update_properties( |
1158 | 467 | mut node: ExprPropertiesNode, |
1159 | 467 | eq_properties: &EquivalenceProperties, |
1160 | 467 | ) -> Result<Transformed<ExprPropertiesNode>> { |
1161 | 467 | // First, try to gather the information from the children: |
1162 | 467 | if !node.expr.children().is_empty() { |
1163 | | // We have an intermediate (non-leaf) node, account for its children: |
1164 | 0 | let children_props = node.children.iter().map(|c| c.data.clone()).collect_vec(); |
1165 | 0 | node.data = node.expr.get_properties(&children_props)?; |
1166 | 467 | } else if node.expr.as_any().is::<Literal>() { |
1167 | | // We have a Literal, which is one of the two possible leaf node types: |
1168 | 1 | node.data = node.expr.get_properties(&[])?0 ; |
1169 | 466 | } else if node.expr.as_any().is::<Column>() { |
1170 | | // We have a Column, which is the other possible leaf node type: |
1171 | 466 | node.data.range = |
1172 | 466 | Interval::make_unbounded(&node.expr.data_type(eq_properties.schema())?0 )?0 |
1173 | 0 | } |
1174 | | // Now, check what we know about orderings: |
1175 | 467 | let normalized_expr = eq_properties |
1176 | 467 | .eq_group |
1177 | 467 | .normalize_expr(Arc::clone(&node.expr)); |
1178 | 467 | if eq_properties.is_expr_constant(&normalized_expr) { |
1179 | 2 | node.data.sort_properties = SortProperties::Singleton; |
1180 | 465 | } else if let Some(options150 ) = eq_properties |
1181 | 465 | .normalized_oeq_class() |
1182 | 465 | .get_options(&normalized_expr) |
1183 | 150 | { |
1184 | 150 | node.data.sort_properties = SortProperties::Ordered(options); |
1185 | 315 | } |
1186 | 467 | Ok(Transformed::yes(node)) |
1187 | 467 | } |
1188 | | |
1189 | | /// This function determines whether the provided expression is constant |
1190 | | /// based on the known constants. |
1191 | | /// |
1192 | | /// # Arguments |
1193 | | /// |
1194 | | /// - `constants`: A `&[Arc<dyn PhysicalExpr>]` containing expressions known to |
1195 | | /// be a constant. |
1196 | | /// - `expr`: A reference to a `Arc<dyn PhysicalExpr>` representing the expression |
1197 | | /// to check. |
1198 | | /// |
1199 | | /// # Returns |
1200 | | /// |
1201 | | /// Returns `true` if the expression is constant according to equivalence |
1202 | | /// group, `false` otherwise. |
1203 | 546 | fn is_constant_recurse( |
1204 | 546 | constants: &[Arc<dyn PhysicalExpr>], |
1205 | 546 | expr: &Arc<dyn PhysicalExpr>, |
1206 | 546 | ) -> bool { |
1207 | 546 | if physical_exprs_contains(constants, expr) || expr.as_any().is::<Literal>()545 { |
1208 | 11 | return true; |
1209 | 535 | } |
1210 | 535 | let children = expr.children(); |
1211 | 535 | !children.is_empty() && children.iter().all(0 |c| is_constant_recurse(constants, c)0 ) |
1212 | 546 | } |
1213 | | |
1214 | | /// This function examines whether a referring expression directly refers to a |
1215 | | /// given referred expression or if any of its children in the expression tree |
1216 | | /// refer to the specified expression. |
1217 | | /// |
1218 | | /// # Parameters |
1219 | | /// |
1220 | | /// - `referring_expr`: A reference to the referring expression (`Arc<dyn PhysicalExpr>`). |
1221 | | /// - `referred_expr`: A reference to the referred expression (`Arc<dyn PhysicalExpr>`) |
1222 | | /// |
1223 | | /// # Returns |
1224 | | /// |
1225 | | /// A boolean value indicating whether `referring_expr` refers (needs it to evaluate its result) |
1226 | | /// `referred_expr` or not. |
1227 | 6 | fn expr_refers( |
1228 | 6 | referring_expr: &Arc<dyn PhysicalExpr>, |
1229 | 6 | referred_expr: &Arc<dyn PhysicalExpr>, |
1230 | 6 | ) -> bool { |
1231 | 6 | referring_expr.eq(referred_expr) |
1232 | 4 | || referring_expr |
1233 | 4 | .children() |
1234 | 4 | .iter() |
1235 | 4 | .any(|child| expr_refers(child, referred_expr)0 )2 |
1236 | 6 | } |
1237 | | |
1238 | | /// This function analyzes the dependency map to collect referred dependencies for |
1239 | | /// a given source expression. |
1240 | | /// |
1241 | | /// # Parameters |
1242 | | /// |
1243 | | /// - `dependency_map`: A reference to the `DependencyMap` where each |
1244 | | /// `PhysicalSortExpr` is associated with a `DependencyNode`. |
1245 | | /// - `source`: A reference to the source expression (`Arc<dyn PhysicalExpr>`) |
1246 | | /// for which relevant dependencies need to be identified. |
1247 | | /// |
1248 | | /// # Returns |
1249 | | /// |
1250 | | /// A `Vec<Dependencies>` containing the dependencies for the given source |
1251 | | /// expression. These dependencies are expressions that are referred to by |
1252 | | /// the source expression based on the provided dependency map. |
1253 | 61 | fn referred_dependencies( |
1254 | 61 | dependency_map: &DependencyMap, |
1255 | 61 | source: &Arc<dyn PhysicalExpr>, |
1256 | 61 | ) -> Vec<Dependencies> { |
1257 | 61 | // Associate `PhysicalExpr`s with `PhysicalSortExpr`s that contain them: |
1258 | 61 | let mut expr_to_sort_exprs = IndexMap::<ExprWrapper, Dependencies>::new(); |
1259 | 61 | for sort_expr1 in dependency_map |
1260 | 61 | .keys() |
1261 | 61 | .filter(|sort_expr| expr_refers(source, &sort_expr.expr)3 ) |
1262 | 1 | { |
1263 | 1 | let key = ExprWrapper(Arc::clone(&sort_expr.expr)); |
1264 | 1 | expr_to_sort_exprs |
1265 | 1 | .entry(key) |
1266 | 1 | .or_default() |
1267 | 1 | .insert(sort_expr.clone()); |
1268 | 1 | } |
1269 | | |
1270 | | // Generate all valid dependencies for the source. For example, if the source |
1271 | | // is `a + b` and the map is `[a -> (a ASC, a DESC), b -> (b ASC)]`, we get |
1272 | | // `vec![HashSet(a ASC, b ASC), HashSet(a DESC, b ASC)]`. |
1273 | 61 | expr_to_sort_exprs |
1274 | 61 | .values() |
1275 | 61 | .multi_cartesian_product() |
1276 | 61 | .map(|referred_deps| referred_deps.into_iter().cloned().collect()) |
1277 | 61 | .collect() |
1278 | 61 | } |
1279 | | |
1280 | | /// This function retrieves the dependencies of the given relevant sort expression |
1281 | | /// from the given dependency map. It then constructs prefix orderings by recursively |
1282 | | /// analyzing the dependencies and include them in the orderings. |
1283 | | /// |
1284 | | /// # Parameters |
1285 | | /// |
1286 | | /// - `relevant_sort_expr`: A reference to the relevant sort expression |
1287 | | /// (`PhysicalSortExpr`) for which prefix orderings are to be constructed. |
1288 | | /// - `dependency_map`: A reference to the `DependencyMap` containing dependencies. |
1289 | | /// |
1290 | | /// # Returns |
1291 | | /// |
1292 | | /// A vector of prefix orderings (`Vec<LexOrdering>`) based on the given relevant |
1293 | | /// sort expression and its dependencies. |
1294 | 2 | fn construct_prefix_orderings( |
1295 | 2 | relevant_sort_expr: &PhysicalSortExpr, |
1296 | 2 | dependency_map: &DependencyMap, |
1297 | 2 | ) -> Vec<LexOrdering> { |
1298 | 2 | let mut dep_enumerator = DependencyEnumerator::new(); |
1299 | 2 | dependency_map[relevant_sort_expr] |
1300 | 2 | .dependencies |
1301 | 2 | .iter() |
1302 | 2 | .flat_map(|dep| dep_enumerator.construct_orderings(dep, dependency_map)0 ) |
1303 | 2 | .collect() |
1304 | 2 | } |
1305 | | |
1306 | | /// Generates all possible orderings where dependencies are satisfied for the |
1307 | | /// current projection expression. |
1308 | | /// |
1309 | | /// # Examaple |
1310 | | /// If `dependences` is `a + b ASC` and the dependency map holds dependencies |
1311 | | /// * `a ASC` --> `[c ASC]` |
1312 | | /// * `b ASC` --> `[d DESC]`, |
1313 | | /// |
1314 | | /// This function generates these two sort orders |
1315 | | /// * `[c ASC, d DESC, a + b ASC]` |
1316 | | /// * `[d DESC, c ASC, a + b ASC]` |
1317 | | /// |
1318 | | /// # Parameters |
1319 | | /// |
1320 | | /// * `dependencies` - Set of relevant expressions. |
1321 | | /// * `dependency_map` - Map of dependencies for expressions that may appear in `dependencies` |
1322 | | /// |
1323 | | /// # Returns |
1324 | | /// |
1325 | | /// A vector of lexical orderings (`Vec<LexOrdering>`) representing all valid orderings |
1326 | | /// based on the given dependencies. |
1327 | 1 | fn generate_dependency_orderings( |
1328 | 1 | dependencies: &Dependencies, |
1329 | 1 | dependency_map: &DependencyMap, |
1330 | 1 | ) -> Vec<LexOrdering> { |
1331 | 1 | // Construct all the valid prefix orderings for each expression appearing |
1332 | 1 | // in the projection: |
1333 | 1 | let relevant_prefixes = dependencies |
1334 | 1 | .iter() |
1335 | 1 | .flat_map(|dep| { |
1336 | 1 | let prefixes = construct_prefix_orderings(dep, dependency_map); |
1337 | 1 | (!prefixes.is_empty()).then_some(prefixes) |
1338 | 1 | }) |
1339 | 1 | .collect::<Vec<_>>(); |
1340 | 1 | |
1341 | 1 | // No dependency, dependent is a leading ordering. |
1342 | 1 | if relevant_prefixes.is_empty() { |
1343 | | // Return an empty ordering: |
1344 | 1 | return vec![vec![]]; |
1345 | 0 | } |
1346 | 0 |
|
1347 | 0 | relevant_prefixes |
1348 | 0 | .into_iter() |
1349 | 0 | .multi_cartesian_product() |
1350 | 0 | .flat_map(|prefix_orderings| { |
1351 | 0 | prefix_orderings |
1352 | 0 | .iter() |
1353 | 0 | .permutations(prefix_orderings.len()) |
1354 | 0 | .map(|prefixes| prefixes.into_iter().flatten().cloned().collect()) |
1355 | 0 | .collect::<Vec<_>>() |
1356 | 0 | }) |
1357 | 0 | .collect() |
1358 | 1 | } |
1359 | | |
1360 | | /// This function examines the given expression and its properties to determine |
1361 | | /// the ordering properties of the expression. The range knowledge is not utilized |
1362 | | /// yet in the scope of this function. |
1363 | | /// |
1364 | | /// # Parameters |
1365 | | /// |
1366 | | /// - `expr`: A reference to the source expression (`Arc<dyn PhysicalExpr>`) for |
1367 | | /// which ordering properties need to be determined. |
1368 | | /// - `dependencies`: A reference to `Dependencies`, containing sort expressions |
1369 | | /// referred to by `expr`. |
1370 | | /// - `schema``: A reference to the schema which the `expr` columns refer. |
1371 | | /// |
1372 | | /// # Returns |
1373 | | /// |
1374 | | /// A `SortProperties` indicating the ordering information of the given expression. |
1375 | 61 | fn get_expr_properties( |
1376 | 61 | expr: &Arc<dyn PhysicalExpr>, |
1377 | 61 | dependencies: &Dependencies, |
1378 | 61 | schema: &SchemaRef, |
1379 | 61 | ) -> Result<ExprProperties> { |
1380 | 61 | if let Some(column_order1 ) = dependencies.iter().find(|&order| expr.eq(&order.expr)1 ) { |
1381 | | // If exact match is found, return its ordering. |
1382 | | Ok(ExprProperties { |
1383 | 1 | sort_properties: SortProperties::Ordered(column_order.options), |
1384 | 1 | range: Interval::make_unbounded(&expr.data_type(schema)?0 )?0 , |
1385 | | }) |
1386 | 60 | } else if expr.as_any().downcast_ref::<Column>().is_some() { |
1387 | | Ok(ExprProperties { |
1388 | 59 | sort_properties: SortProperties::Unordered, |
1389 | 59 | range: Interval::make_unbounded(&expr.data_type(schema)?0 )?0 , |
1390 | | }) |
1391 | 1 | } else if let Some(literal) = expr.as_any().downcast_ref::<Literal>() { |
1392 | | Ok(ExprProperties { |
1393 | 1 | sort_properties: SortProperties::Singleton, |
1394 | 1 | range: Interval::try_new(literal.value().clone(), literal.value().clone())?0 , |
1395 | | }) |
1396 | | } else { |
1397 | | // Find orderings of its children |
1398 | 0 | let child_states = expr |
1399 | 0 | .children() |
1400 | 0 | .iter() |
1401 | 0 | .map(|child| get_expr_properties(child, dependencies, schema)) |
1402 | 0 | .collect::<Result<Vec<_>>>()?; |
1403 | | // Calculate expression ordering using ordering of its children. |
1404 | 0 | expr.get_properties(&child_states) |
1405 | | } |
1406 | 61 | } |
1407 | | |
1408 | | /// Represents a node in the dependency map used to construct projected orderings. |
1409 | | /// |
1410 | | /// A `DependencyNode` contains information about a particular sort expression, |
1411 | | /// including its target sort expression and a set of dependencies on other sort |
1412 | | /// expressions. |
1413 | | /// |
1414 | | /// # Fields |
1415 | | /// |
1416 | | /// - `target_sort_expr`: An optional `PhysicalSortExpr` representing the target |
1417 | | /// sort expression associated with the node. It is `None` if the sort expression |
1418 | | /// cannot be projected. |
1419 | | /// - `dependencies`: A [`Dependencies`] containing dependencies on other sort |
1420 | | /// expressions that are referred to by the target sort expression. |
1421 | | #[derive(Debug, Clone, PartialEq, Eq)] |
1422 | | struct DependencyNode { |
1423 | | target_sort_expr: Option<PhysicalSortExpr>, |
1424 | | dependencies: Dependencies, |
1425 | | } |
1426 | | |
1427 | | impl DependencyNode { |
1428 | | /// Insert dependency to the state (if exists). |
1429 | 1 | fn insert_dependency(&mut self, dependency: Option<&PhysicalSortExpr>) { |
1430 | 1 | if let Some(dep0 ) = dependency { |
1431 | 0 | self.dependencies.insert(dep.clone()); |
1432 | 1 | } |
1433 | 1 | } |
1434 | | } |
1435 | | |
1436 | | // Using `IndexMap` and `IndexSet` makes sure to generate consistent results across different executions for the same query. |
1437 | | // We could have used `HashSet`, `HashMap` in place of them without any loss of functionality. |
1438 | | // As an example, if existing orderings are `[a ASC, b ASC]`, `[c ASC]` for output ordering |
1439 | | // both `[a ASC, b ASC, c ASC]` and `[c ASC, a ASC, b ASC]` are valid (e.g. concatenated version of the alternative orderings). |
1440 | | // When using `HashSet`, `HashMap` it is not guaranteed to generate consistent result, among the possible 2 results in the example above. |
1441 | | type DependencyMap = IndexMap<PhysicalSortExpr, DependencyNode>; |
1442 | | type Dependencies = IndexSet<PhysicalSortExpr>; |
1443 | | |
1444 | | /// Contains a mapping of all dependencies we have processed for each sort expr |
1445 | | struct DependencyEnumerator<'a> { |
1446 | | /// Maps `expr` --> `[exprs]` that have previously been processed |
1447 | | seen: IndexMap<&'a PhysicalSortExpr, IndexSet<&'a PhysicalSortExpr>>, |
1448 | | } |
1449 | | |
1450 | | impl<'a> DependencyEnumerator<'a> { |
1451 | 2 | fn new() -> Self { |
1452 | 2 | Self { |
1453 | 2 | seen: IndexMap::new(), |
1454 | 2 | } |
1455 | 2 | } |
1456 | | |
1457 | | /// Insert a new dependency, |
1458 | | /// |
1459 | | /// returns false if the dependency was already in the map |
1460 | | /// returns true if the dependency was newly inserted |
1461 | 0 | fn insert( |
1462 | 0 | &mut self, |
1463 | 0 | target: &'a PhysicalSortExpr, |
1464 | 0 | dep: &'a PhysicalSortExpr, |
1465 | 0 | ) -> bool { |
1466 | 0 | self.seen.entry(target).or_default().insert(dep) |
1467 | 0 | } |
1468 | | |
1469 | | /// This function recursively analyzes the dependencies of the given sort |
1470 | | /// expression within the given dependency map to construct lexicographical |
1471 | | /// orderings that include the sort expression and its dependencies. |
1472 | | /// |
1473 | | /// # Parameters |
1474 | | /// |
1475 | | /// - `referred_sort_expr`: A reference to the sort expression (`PhysicalSortExpr`) |
1476 | | /// for which lexicographical orderings satisfying its dependencies are to be |
1477 | | /// constructed. |
1478 | | /// - `dependency_map`: A reference to the `DependencyMap` that contains |
1479 | | /// dependencies for different `PhysicalSortExpr`s. |
1480 | | /// |
1481 | | /// # Returns |
1482 | | /// |
1483 | | /// A vector of lexicographical orderings (`Vec<LexOrdering>`) based on the given |
1484 | | /// sort expression and its dependencies. |
1485 | 0 | fn construct_orderings( |
1486 | 0 | &mut self, |
1487 | 0 | referred_sort_expr: &'a PhysicalSortExpr, |
1488 | 0 | dependency_map: &'a DependencyMap, |
1489 | 0 | ) -> Vec<LexOrdering> { |
1490 | 0 | // We are sure that `referred_sort_expr` is inside `dependency_map`. |
1491 | 0 | let node = &dependency_map[referred_sort_expr]; |
1492 | 0 | // Since we work on intermediate nodes, we are sure `val.target_sort_expr` |
1493 | 0 | // exists. |
1494 | 0 | let target_sort_expr = node.target_sort_expr.as_ref().unwrap(); |
1495 | 0 | if node.dependencies.is_empty() { |
1496 | 0 | return vec![vec![target_sort_expr.clone()]]; |
1497 | 0 | }; |
1498 | 0 |
|
1499 | 0 | node.dependencies |
1500 | 0 | .iter() |
1501 | 0 | .flat_map(|dep| { |
1502 | 0 | let mut orderings = if self.insert(target_sort_expr, dep) { |
1503 | 0 | self.construct_orderings(dep, dependency_map) |
1504 | | } else { |
1505 | 0 | vec![] |
1506 | | }; |
1507 | 0 | for ordering in orderings.iter_mut() { |
1508 | 0 | ordering.push(target_sort_expr.clone()) |
1509 | | } |
1510 | 0 | orderings |
1511 | 0 | }) |
1512 | 0 | .collect() |
1513 | 0 | } |
1514 | | } |
1515 | | |
1516 | | /// Calculate ordering equivalence properties for the given join operation. |
1517 | 1.14k | pub fn join_equivalence_properties( |
1518 | 1.14k | left: EquivalenceProperties, |
1519 | 1.14k | right: EquivalenceProperties, |
1520 | 1.14k | join_type: &JoinType, |
1521 | 1.14k | join_schema: SchemaRef, |
1522 | 1.14k | maintains_input_order: &[bool], |
1523 | 1.14k | probe_side: Option<JoinSide>, |
1524 | 1.14k | on: &[(PhysicalExprRef, PhysicalExprRef)], |
1525 | 1.14k | ) -> EquivalenceProperties { |
1526 | 1.14k | let left_size = left.schema.fields.len(); |
1527 | 1.14k | let mut result = EquivalenceProperties::new(join_schema); |
1528 | 1.14k | result.add_equivalence_group(left.eq_group().join( |
1529 | 1.14k | right.eq_group(), |
1530 | 1.14k | join_type, |
1531 | 1.14k | left_size, |
1532 | 1.14k | on, |
1533 | 1.14k | )); |
1534 | 1.14k | |
1535 | 1.14k | let EquivalenceProperties { |
1536 | 1.14k | constants: left_constants, |
1537 | 1.14k | oeq_class: left_oeq_class, |
1538 | 1.14k | .. |
1539 | 1.14k | } = left; |
1540 | 1.14k | let EquivalenceProperties { |
1541 | 1.14k | constants: right_constants, |
1542 | 1.14k | oeq_class: mut right_oeq_class, |
1543 | 1.14k | .. |
1544 | 1.14k | } = right; |
1545 | 1.14k | match maintains_input_order { |
1546 | 1.14k | [true, false] => { |
1547 | | // In this special case, right side ordering can be prefixed with |
1548 | | // the left side ordering. |
1549 | 54 | if let (Some(JoinSide::Left), JoinType::Inner) = (probe_side, join_type) { |
1550 | 19 | updated_right_ordering_equivalence_class( |
1551 | 19 | &mut right_oeq_class, |
1552 | 19 | join_type, |
1553 | 19 | left_size, |
1554 | 19 | ); |
1555 | 19 | |
1556 | 19 | // Right side ordering equivalence properties should be prepended |
1557 | 19 | // with those of the left side while constructing output ordering |
1558 | 19 | // equivalence properties since stream side is the left side. |
1559 | 19 | // |
1560 | 19 | // For example, if the right side ordering equivalences contain |
1561 | 19 | // `b ASC`, and the left side ordering equivalences contain `a ASC`, |
1562 | 19 | // then we should add `a ASC, b ASC` to the ordering equivalences |
1563 | 19 | // of the join output. |
1564 | 19 | let out_oeq_class = left_oeq_class.join_suffix(&right_oeq_class); |
1565 | 19 | result.add_ordering_equivalence_class(out_oeq_class); |
1566 | 35 | } else { |
1567 | 35 | result.add_ordering_equivalence_class(left_oeq_class); |
1568 | 35 | } |
1569 | | } |
1570 | | [false, true] => { |
1571 | 400 | updated_right_ordering_equivalence_class( |
1572 | 400 | &mut right_oeq_class, |
1573 | 400 | join_type, |
1574 | 400 | left_size, |
1575 | 400 | ); |
1576 | 400 | // In this special case, left side ordering can be prefixed with |
1577 | 400 | // the right side ordering. |
1578 | 400 | if let (Some(JoinSide::Right), JoinType::Inner) = (probe_side, join_type) { |
1579 | 103 | // Left side ordering equivalence properties should be prepended |
1580 | 103 | // with those of the right side while constructing output ordering |
1581 | 103 | // equivalence properties since stream side is the right side. |
1582 | 103 | // |
1583 | 103 | // For example, if the left side ordering equivalences contain |
1584 | 103 | // `a ASC`, and the right side ordering equivalences contain `b ASC`, |
1585 | 103 | // then we should add `b ASC, a ASC` to the ordering equivalences |
1586 | 103 | // of the join output. |
1587 | 103 | let out_oeq_class = right_oeq_class.join_suffix(&left_oeq_class); |
1588 | 103 | result.add_ordering_equivalence_class(out_oeq_class); |
1589 | 297 | } else { |
1590 | 297 | result.add_ordering_equivalence_class(right_oeq_class); |
1591 | 297 | } |
1592 | | } |
1593 | 695 | [false, false] => {} |
1594 | 0 | [true, true] => unreachable!("Cannot maintain ordering of both sides"), |
1595 | 0 | _ => unreachable!("Join operators can not have more than two children"), |
1596 | | } |
1597 | 1.14k | match join_type { |
1598 | 268 | JoinType::LeftAnti | JoinType::LeftSemi => { |
1599 | 268 | result = result.with_constants(left_constants); |
1600 | 268 | } |
1601 | 264 | JoinType::RightAnti | JoinType::RightSemi => { |
1602 | 264 | result = result.with_constants(right_constants); |
1603 | 264 | } |
1604 | 617 | _ => {} |
1605 | | } |
1606 | 1.14k | result |
1607 | 1.14k | } |
1608 | | |
1609 | | /// In the context of a join, update the right side `OrderingEquivalenceClass` |
1610 | | /// so that they point to valid indices in the join output schema. |
1611 | | /// |
1612 | | /// To do so, we increment column indices by the size of the left table when |
1613 | | /// join schema consists of a combination of the left and right schemas. This |
1614 | | /// is the case for `Inner`, `Left`, `Full` and `Right` joins. For other cases, |
1615 | | /// indices do not change. |
1616 | 419 | fn updated_right_ordering_equivalence_class( |
1617 | 419 | right_oeq_class: &mut OrderingEquivalenceClass, |
1618 | 419 | join_type: &JoinType, |
1619 | 419 | left_size: usize, |
1620 | 419 | ) { |
1621 | 182 | if matches!( |
1622 | 419 | join_type, |
1623 | | JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right |
1624 | 237 | ) { |
1625 | 237 | right_oeq_class.add_offset(left_size); |
1626 | 237 | }182 |
1627 | 419 | } |
1628 | | |
1629 | | /// Wrapper struct for `Arc<dyn PhysicalExpr>` to use them as keys in a hash map. |
1630 | | #[derive(Debug, Clone)] |
1631 | | struct ExprWrapper(Arc<dyn PhysicalExpr>); |
1632 | | |
1633 | | impl PartialEq<Self> for ExprWrapper { |
1634 | 0 | fn eq(&self, other: &Self) -> bool { |
1635 | 0 | self.0.eq(&other.0) |
1636 | 0 | } |
1637 | | } |
1638 | | |
1639 | | impl Eq for ExprWrapper {} |
1640 | | |
1641 | | impl Hash for ExprWrapper { |
1642 | 1 | fn hash<H: Hasher>(&self, state: &mut H) { |
1643 | 1 | self.0.hash(state); |
1644 | 1 | } |
1645 | | } |
1646 | | |
1647 | | /// Calculates the union (in the sense of `UnionExec`) `EquivalenceProperties` |
1648 | | /// of `lhs` and `rhs` according to the schema of `lhs`. |
1649 | 5 | fn calculate_union_binary( |
1650 | 5 | lhs: EquivalenceProperties, |
1651 | 5 | mut rhs: EquivalenceProperties, |
1652 | 5 | ) -> Result<EquivalenceProperties> { |
1653 | 5 | // TODO: In some cases, we should be able to preserve some equivalence |
1654 | 5 | // classes. Add support for such cases. |
1655 | 5 | |
1656 | 5 | // Harmonize the schema of the rhs with the schema of the lhs (which is the accumulator schema): |
1657 | 5 | if !rhs.schema.eq(&lhs.schema) { |
1658 | 0 | rhs = rhs.with_new_schema(Arc::clone(&lhs.schema))?; |
1659 | 5 | } |
1660 | | |
1661 | | // First, calculate valid constants for the union. A quantity is constant |
1662 | | // after the union if it is constant in both sides. |
1663 | 5 | let constants = lhs |
1664 | 5 | .constants() |
1665 | 5 | .iter() |
1666 | 5 | .filter(|const_expr| const_exprs_contains(rhs.constants(), const_expr.expr())0 ) |
1667 | 5 | .map(|const_expr| { |
1668 | 0 | // TODO: When both sides' constants are valid across partitions, |
1669 | 0 | // the union's constant should also be valid if values are |
1670 | 0 | // the same. However, we do not have the capability to |
1671 | 0 | // check this yet. |
1672 | 0 | ConstExpr::new(Arc::clone(const_expr.expr())).with_across_partitions(false) |
1673 | 5 | }) |
1674 | 5 | .collect(); |
1675 | 5 | |
1676 | 5 | // Next, calculate valid orderings for the union by searching for prefixes |
1677 | 5 | // in both sides. |
1678 | 5 | let mut orderings = vec![]; |
1679 | 5 | for mut ordering4 in lhs.normalized_oeq_class().orderings { |
1680 | | // Progressively shorten the ordering to search for a satisfied prefix: |
1681 | 6 | while !rhs.ordering_satisfy(&ordering) { |
1682 | 2 | ordering.pop(); |
1683 | 2 | } |
1684 | | // There is a non-trivial satisfied prefix, add it as a valid ordering: |
1685 | 4 | if !ordering.is_empty() { |
1686 | 3 | orderings.push(ordering); |
1687 | 3 | }1 |
1688 | | } |
1689 | 5 | for mut ordering in rhs.normalized_oeq_class().orderings { |
1690 | | // Progressively shorten the ordering to search for a satisfied prefix: |
1691 | 8 | while !lhs.ordering_satisfy(&ordering) { |
1692 | 3 | ordering.pop(); |
1693 | 3 | } |
1694 | | // There is a non-trivial satisfied prefix, add it as a valid ordering: |
1695 | 5 | if !ordering.is_empty() { |
1696 | 4 | orderings.push(ordering); |
1697 | 4 | }1 |
1698 | | } |
1699 | 5 | let mut eq_properties = EquivalenceProperties::new(lhs.schema); |
1700 | 5 | eq_properties.constants = constants; |
1701 | 5 | eq_properties.add_new_orderings(orderings); |
1702 | 5 | Ok(eq_properties) |
1703 | 5 | } |
1704 | | |
1705 | | /// Calculates the union (in the sense of `UnionExec`) `EquivalenceProperties` |
1706 | | /// of the given `EquivalenceProperties` in `eqps` according to the given |
1707 | | /// output `schema` (which need not be the same with those of `lhs` and `rhs` |
1708 | | /// as details such as nullability may be different). |
1709 | 5 | pub fn calculate_union( |
1710 | 5 | eqps: Vec<EquivalenceProperties>, |
1711 | 5 | schema: SchemaRef, |
1712 | 5 | ) -> Result<EquivalenceProperties> { |
1713 | 5 | // TODO: In some cases, we should be able to preserve some equivalence |
1714 | 5 | // classes. Add support for such cases. |
1715 | 5 | let mut iter = eqps.into_iter(); |
1716 | 5 | let Some(mut acc) = iter.next() else { |
1717 | 0 | return internal_err!( |
1718 | 0 | "Cannot calculate EquivalenceProperties for a union with no inputs" |
1719 | 0 | ); |
1720 | | }; |
1721 | | |
1722 | | // Harmonize the schema of the init with the schema of the union: |
1723 | 5 | if !acc.schema.eq(&schema) { |
1724 | 0 | acc = acc.with_new_schema(schema)?; |
1725 | 5 | } |
1726 | | // Fold in the rest of the EquivalenceProperties: |
1727 | 10 | for props5 in iter { |
1728 | 5 | acc = calculate_union_binary(acc, props)?0 ; |
1729 | | } |
1730 | 5 | Ok(acc) |
1731 | 5 | } |
1732 | | |
1733 | | #[cfg(test)] |
1734 | | mod tests { |
1735 | | use std::ops::Not; |
1736 | | |
1737 | | use super::*; |
1738 | | use crate::equivalence::add_offset_to_expr; |
1739 | | use crate::equivalence::tests::{ |
1740 | | convert_to_orderings, convert_to_sort_exprs, convert_to_sort_reqs, |
1741 | | create_random_schema, create_test_params, create_test_schema, |
1742 | | generate_table_for_eq_properties, is_table_same_after_sort, output_schema, |
1743 | | }; |
1744 | | use crate::expressions::{col, BinaryExpr, Column}; |
1745 | | use crate::utils::tests::TestScalarUDF; |
1746 | | |
1747 | | use arrow::datatypes::{DataType, Field, Schema}; |
1748 | | use arrow_schema::{Fields, TimeUnit}; |
1749 | | use datafusion_common::DFSchema; |
1750 | | use datafusion_expr::{Operator, ScalarUDF}; |
1751 | | |
1752 | | #[test] |
1753 | | fn project_equivalence_properties_test() -> Result<()> { |
1754 | | let input_schema = Arc::new(Schema::new(vec![ |
1755 | | Field::new("a", DataType::Int64, true), |
1756 | | Field::new("b", DataType::Int64, true), |
1757 | | Field::new("c", DataType::Int64, true), |
1758 | | ])); |
1759 | | |
1760 | | let input_properties = EquivalenceProperties::new(Arc::clone(&input_schema)); |
1761 | | let col_a = col("a", &input_schema)?; |
1762 | | |
1763 | | // a as a1, a as a2, a as a3, a as a3 |
1764 | | let proj_exprs = vec![ |
1765 | | (Arc::clone(&col_a), "a1".to_string()), |
1766 | | (Arc::clone(&col_a), "a2".to_string()), |
1767 | | (Arc::clone(&col_a), "a3".to_string()), |
1768 | | (Arc::clone(&col_a), "a4".to_string()), |
1769 | | ]; |
1770 | | let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?; |
1771 | | |
1772 | | let out_schema = output_schema(&projection_mapping, &input_schema)?; |
1773 | | // a as a1, a as a2, a as a3, a as a3 |
1774 | | let proj_exprs = vec![ |
1775 | | (Arc::clone(&col_a), "a1".to_string()), |
1776 | | (Arc::clone(&col_a), "a2".to_string()), |
1777 | | (Arc::clone(&col_a), "a3".to_string()), |
1778 | | (Arc::clone(&col_a), "a4".to_string()), |
1779 | | ]; |
1780 | | let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?; |
1781 | | |
1782 | | // a as a1, a as a2, a as a3, a as a3 |
1783 | | let col_a1 = &col("a1", &out_schema)?; |
1784 | | let col_a2 = &col("a2", &out_schema)?; |
1785 | | let col_a3 = &col("a3", &out_schema)?; |
1786 | | let col_a4 = &col("a4", &out_schema)?; |
1787 | | let out_properties = input_properties.project(&projection_mapping, out_schema); |
1788 | | |
1789 | | // At the output a1=a2=a3=a4 |
1790 | | assert_eq!(out_properties.eq_group().len(), 1); |
1791 | | let eq_class = &out_properties.eq_group().classes[0]; |
1792 | | assert_eq!(eq_class.len(), 4); |
1793 | | assert!(eq_class.contains(col_a1)); |
1794 | | assert!(eq_class.contains(col_a2)); |
1795 | | assert!(eq_class.contains(col_a3)); |
1796 | | assert!(eq_class.contains(col_a4)); |
1797 | | |
1798 | | Ok(()) |
1799 | | } |
1800 | | |
1801 | | #[test] |
1802 | | fn project_equivalence_properties_test_multi() -> Result<()> { |
1803 | | // test multiple input orderings with equivalence properties |
1804 | | let input_schema = Arc::new(Schema::new(vec![ |
1805 | | Field::new("a", DataType::Int64, true), |
1806 | | Field::new("b", DataType::Int64, true), |
1807 | | Field::new("c", DataType::Int64, true), |
1808 | | Field::new("d", DataType::Int64, true), |
1809 | | ])); |
1810 | | |
1811 | | let mut input_properties = EquivalenceProperties::new(Arc::clone(&input_schema)); |
1812 | | // add equivalent ordering [a, b, c, d] |
1813 | | input_properties.add_new_ordering(vec![ |
1814 | | parse_sort_expr("a", &input_schema), |
1815 | | parse_sort_expr("b", &input_schema), |
1816 | | parse_sort_expr("c", &input_schema), |
1817 | | parse_sort_expr("d", &input_schema), |
1818 | | ]); |
1819 | | |
1820 | | // add equivalent ordering [a, c, b, d] |
1821 | | input_properties.add_new_ordering(vec![ |
1822 | | parse_sort_expr("a", &input_schema), |
1823 | | parse_sort_expr("c", &input_schema), |
1824 | | parse_sort_expr("b", &input_schema), // NB b and c are swapped |
1825 | | parse_sort_expr("d", &input_schema), |
1826 | | ]); |
1827 | | |
1828 | | // simply project all the columns in order |
1829 | | let proj_exprs = vec![ |
1830 | | (col("a", &input_schema)?, "a".to_string()), |
1831 | | (col("b", &input_schema)?, "b".to_string()), |
1832 | | (col("c", &input_schema)?, "c".to_string()), |
1833 | | (col("d", &input_schema)?, "d".to_string()), |
1834 | | ]; |
1835 | | let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?; |
1836 | | let out_properties = input_properties.project(&projection_mapping, input_schema); |
1837 | | |
1838 | | assert_eq!( |
1839 | | out_properties.to_string(), |
1840 | | "order: [[a@0 ASC,c@2 ASC,b@1 ASC,d@3 ASC], [a@0 ASC,b@1 ASC,c@2 ASC,d@3 ASC]]" |
1841 | | ); |
1842 | | |
1843 | | Ok(()) |
1844 | | } |
1845 | | |
1846 | | #[test] |
1847 | | fn test_join_equivalence_properties() -> Result<()> { |
1848 | | let schema = create_test_schema()?; |
1849 | | let col_a = &col("a", &schema)?; |
1850 | | let col_b = &col("b", &schema)?; |
1851 | | let col_c = &col("c", &schema)?; |
1852 | | let offset = schema.fields.len(); |
1853 | | let col_a2 = &add_offset_to_expr(Arc::clone(col_a), offset); |
1854 | | let col_b2 = &add_offset_to_expr(Arc::clone(col_b), offset); |
1855 | | let option_asc = SortOptions { |
1856 | | descending: false, |
1857 | | nulls_first: false, |
1858 | | }; |
1859 | | let test_cases = vec![ |
1860 | | // ------- TEST CASE 1 -------- |
1861 | | // [a ASC], [b ASC] |
1862 | | ( |
1863 | | // [a ASC], [b ASC] |
1864 | | vec![vec![(col_a, option_asc)], vec![(col_b, option_asc)]], |
1865 | | // [a ASC], [b ASC] |
1866 | | vec![vec![(col_a, option_asc)], vec![(col_b, option_asc)]], |
1867 | | // expected [a ASC, a2 ASC], [a ASC, b2 ASC], [b ASC, a2 ASC], [b ASC, b2 ASC] |
1868 | | vec![ |
1869 | | vec![(col_a, option_asc), (col_a2, option_asc)], |
1870 | | vec![(col_a, option_asc), (col_b2, option_asc)], |
1871 | | vec![(col_b, option_asc), (col_a2, option_asc)], |
1872 | | vec![(col_b, option_asc), (col_b2, option_asc)], |
1873 | | ], |
1874 | | ), |
1875 | | // ------- TEST CASE 2 -------- |
1876 | | // [a ASC], [b ASC] |
1877 | | ( |
1878 | | // [a ASC], [b ASC], [c ASC] |
1879 | | vec![ |
1880 | | vec![(col_a, option_asc)], |
1881 | | vec![(col_b, option_asc)], |
1882 | | vec![(col_c, option_asc)], |
1883 | | ], |
1884 | | // [a ASC], [b ASC] |
1885 | | vec![vec![(col_a, option_asc)], vec![(col_b, option_asc)]], |
1886 | | // expected [a ASC, a2 ASC], [a ASC, b2 ASC], [b ASC, a2 ASC], [b ASC, b2 ASC], [c ASC, a2 ASC], [c ASC, b2 ASC] |
1887 | | vec![ |
1888 | | vec![(col_a, option_asc), (col_a2, option_asc)], |
1889 | | vec![(col_a, option_asc), (col_b2, option_asc)], |
1890 | | vec![(col_b, option_asc), (col_a2, option_asc)], |
1891 | | vec![(col_b, option_asc), (col_b2, option_asc)], |
1892 | | vec![(col_c, option_asc), (col_a2, option_asc)], |
1893 | | vec![(col_c, option_asc), (col_b2, option_asc)], |
1894 | | ], |
1895 | | ), |
1896 | | ]; |
1897 | | for (left_orderings, right_orderings, expected) in test_cases { |
1898 | | let mut left_eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); |
1899 | | let mut right_eq_properties = EquivalenceProperties::new(Arc::clone(&schema)); |
1900 | | let left_orderings = convert_to_orderings(&left_orderings); |
1901 | | let right_orderings = convert_to_orderings(&right_orderings); |
1902 | | let expected = convert_to_orderings(&expected); |
1903 | | left_eq_properties.add_new_orderings(left_orderings); |
1904 | | right_eq_properties.add_new_orderings(right_orderings); |
1905 | | let join_eq = join_equivalence_properties( |
1906 | | left_eq_properties, |
1907 | | right_eq_properties, |
1908 | | &JoinType::Inner, |
1909 | | Arc::new(Schema::empty()), |
1910 | | &[true, false], |
1911 | | Some(JoinSide::Left), |
1912 | | &[], |
1913 | | ); |
1914 | | let orderings = &join_eq.oeq_class.orderings; |
1915 | | let err_msg = format!("expected: {:?}, actual:{:?}", expected, orderings); |
1916 | | assert_eq!( |
1917 | | join_eq.oeq_class.orderings.len(), |
1918 | | expected.len(), |
1919 | | "{}", |
1920 | | err_msg |
1921 | | ); |
1922 | | for ordering in orderings { |
1923 | | assert!( |
1924 | | expected.contains(ordering), |
1925 | | "{}, ordering: {:?}", |
1926 | | err_msg, |
1927 | | ordering |
1928 | | ); |
1929 | | } |
1930 | | } |
1931 | | Ok(()) |
1932 | | } |
1933 | | |
1934 | | #[test] |
1935 | | fn test_expr_consists_of_constants() -> Result<()> { |
1936 | | let schema = Arc::new(Schema::new(vec![ |
1937 | | Field::new("a", DataType::Int32, true), |
1938 | | Field::new("b", DataType::Int32, true), |
1939 | | Field::new("c", DataType::Int32, true), |
1940 | | Field::new("d", DataType::Int32, true), |
1941 | | Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true), |
1942 | | ])); |
1943 | | let col_a = col("a", &schema)?; |
1944 | | let col_b = col("b", &schema)?; |
1945 | | let col_d = col("d", &schema)?; |
1946 | | let b_plus_d = Arc::new(BinaryExpr::new( |
1947 | | Arc::clone(&col_b), |
1948 | | Operator::Plus, |
1949 | | Arc::clone(&col_d), |
1950 | | )) as Arc<dyn PhysicalExpr>; |
1951 | | |
1952 | | let constants = vec![Arc::clone(&col_a), Arc::clone(&col_b)]; |
1953 | | let expr = Arc::clone(&b_plus_d); |
1954 | | assert!(!is_constant_recurse(&constants, &expr)); |
1955 | | |
1956 | | let constants = vec![Arc::clone(&col_a), Arc::clone(&col_b), Arc::clone(&col_d)]; |
1957 | | let expr = Arc::clone(&b_plus_d); |
1958 | | assert!(is_constant_recurse(&constants, &expr)); |
1959 | | Ok(()) |
1960 | | } |
1961 | | |
1962 | | #[test] |
1963 | | fn test_get_updated_right_ordering_equivalence_properties() -> Result<()> { |
1964 | | let join_type = JoinType::Inner; |
1965 | | // Join right child schema |
1966 | | let child_fields: Fields = ["x", "y", "z", "w"] |
1967 | | .into_iter() |
1968 | | .map(|name| Field::new(name, DataType::Int32, true)) |
1969 | | .collect(); |
1970 | | let child_schema = Schema::new(child_fields); |
1971 | | let col_x = &col("x", &child_schema)?; |
1972 | | let col_y = &col("y", &child_schema)?; |
1973 | | let col_z = &col("z", &child_schema)?; |
1974 | | let col_w = &col("w", &child_schema)?; |
1975 | | let option_asc = SortOptions { |
1976 | | descending: false, |
1977 | | nulls_first: false, |
1978 | | }; |
1979 | | // [x ASC, y ASC], [z ASC, w ASC] |
1980 | | let orderings = vec![ |
1981 | | vec![(col_x, option_asc), (col_y, option_asc)], |
1982 | | vec![(col_z, option_asc), (col_w, option_asc)], |
1983 | | ]; |
1984 | | let orderings = convert_to_orderings(&orderings); |
1985 | | // Right child ordering equivalences |
1986 | | let mut right_oeq_class = OrderingEquivalenceClass::new(orderings); |
1987 | | |
1988 | | let left_columns_len = 4; |
1989 | | |
1990 | | let fields: Fields = ["a", "b", "c", "d", "x", "y", "z", "w"] |
1991 | | .into_iter() |
1992 | | .map(|name| Field::new(name, DataType::Int32, true)) |
1993 | | .collect(); |
1994 | | |
1995 | | // Join Schema |
1996 | | let schema = Schema::new(fields); |
1997 | | let col_a = &col("a", &schema)?; |
1998 | | let col_d = &col("d", &schema)?; |
1999 | | let col_x = &col("x", &schema)?; |
2000 | | let col_y = &col("y", &schema)?; |
2001 | | let col_z = &col("z", &schema)?; |
2002 | | let col_w = &col("w", &schema)?; |
2003 | | |
2004 | | let mut join_eq_properties = EquivalenceProperties::new(Arc::new(schema)); |
2005 | | // a=x and d=w |
2006 | | join_eq_properties.add_equal_conditions(col_a, col_x)?; |
2007 | | join_eq_properties.add_equal_conditions(col_d, col_w)?; |
2008 | | |
2009 | | updated_right_ordering_equivalence_class( |
2010 | | &mut right_oeq_class, |
2011 | | &join_type, |
2012 | | left_columns_len, |
2013 | | ); |
2014 | | join_eq_properties.add_ordering_equivalence_class(right_oeq_class); |
2015 | | let result = join_eq_properties.oeq_class().clone(); |
2016 | | |
2017 | | // [x ASC, y ASC], [z ASC, w ASC] |
2018 | | let orderings = vec![ |
2019 | | vec![(col_x, option_asc), (col_y, option_asc)], |
2020 | | vec![(col_z, option_asc), (col_w, option_asc)], |
2021 | | ]; |
2022 | | let orderings = convert_to_orderings(&orderings); |
2023 | | let expected = OrderingEquivalenceClass::new(orderings); |
2024 | | |
2025 | | assert_eq!(result, expected); |
2026 | | |
2027 | | Ok(()) |
2028 | | } |
2029 | | |
2030 | | #[test] |
2031 | | fn test_normalize_ordering_equivalence_classes() -> Result<()> { |
2032 | | let sort_options = SortOptions::default(); |
2033 | | |
2034 | | let schema = Schema::new(vec![ |
2035 | | Field::new("a", DataType::Int32, true), |
2036 | | Field::new("b", DataType::Int32, true), |
2037 | | Field::new("c", DataType::Int32, true), |
2038 | | ]); |
2039 | | let col_a_expr = col("a", &schema)?; |
2040 | | let col_b_expr = col("b", &schema)?; |
2041 | | let col_c_expr = col("c", &schema)?; |
2042 | | let mut eq_properties = EquivalenceProperties::new(Arc::new(schema.clone())); |
2043 | | |
2044 | | eq_properties.add_equal_conditions(&col_a_expr, &col_c_expr)?; |
2045 | | let others = vec![ |
2046 | | vec![PhysicalSortExpr { |
2047 | | expr: Arc::clone(&col_b_expr), |
2048 | | options: sort_options, |
2049 | | }], |
2050 | | vec![PhysicalSortExpr { |
2051 | | expr: Arc::clone(&col_c_expr), |
2052 | | options: sort_options, |
2053 | | }], |
2054 | | ]; |
2055 | | eq_properties.add_new_orderings(others); |
2056 | | |
2057 | | let mut expected_eqs = EquivalenceProperties::new(Arc::new(schema)); |
2058 | | expected_eqs.add_new_orderings([ |
2059 | | vec![PhysicalSortExpr { |
2060 | | expr: Arc::clone(&col_b_expr), |
2061 | | options: sort_options, |
2062 | | }], |
2063 | | vec![PhysicalSortExpr { |
2064 | | expr: Arc::clone(&col_c_expr), |
2065 | | options: sort_options, |
2066 | | }], |
2067 | | ]); |
2068 | | |
2069 | | let oeq_class = eq_properties.oeq_class().clone(); |
2070 | | let expected = expected_eqs.oeq_class(); |
2071 | | assert!(oeq_class.eq(expected)); |
2072 | | |
2073 | | Ok(()) |
2074 | | } |
2075 | | |
2076 | | #[test] |
2077 | | fn test_get_indices_of_matching_sort_exprs_with_order_eq() -> Result<()> { |
2078 | | let sort_options = SortOptions::default(); |
2079 | | let sort_options_not = SortOptions::default().not(); |
2080 | | |
2081 | | let schema = Schema::new(vec![ |
2082 | | Field::new("a", DataType::Int32, true), |
2083 | | Field::new("b", DataType::Int32, true), |
2084 | | ]); |
2085 | | let col_a = &col("a", &schema)?; |
2086 | | let col_b = &col("b", &schema)?; |
2087 | | let required_columns = [Arc::clone(col_b), Arc::clone(col_a)]; |
2088 | | let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); |
2089 | | eq_properties.add_new_orderings([vec![ |
2090 | | PhysicalSortExpr { |
2091 | | expr: Arc::new(Column::new("b", 1)), |
2092 | | options: sort_options_not, |
2093 | | }, |
2094 | | PhysicalSortExpr { |
2095 | | expr: Arc::new(Column::new("a", 0)), |
2096 | | options: sort_options, |
2097 | | }, |
2098 | | ]]); |
2099 | | let (result, idxs) = eq_properties.find_longest_permutation(&required_columns); |
2100 | | assert_eq!(idxs, vec![0, 1]); |
2101 | | assert_eq!( |
2102 | | result, |
2103 | | vec![ |
2104 | | PhysicalSortExpr { |
2105 | | expr: Arc::clone(col_b), |
2106 | | options: sort_options_not |
2107 | | }, |
2108 | | PhysicalSortExpr { |
2109 | | expr: Arc::clone(col_a), |
2110 | | options: sort_options |
2111 | | } |
2112 | | ] |
2113 | | ); |
2114 | | |
2115 | | let schema = Schema::new(vec![ |
2116 | | Field::new("a", DataType::Int32, true), |
2117 | | Field::new("b", DataType::Int32, true), |
2118 | | Field::new("c", DataType::Int32, true), |
2119 | | ]); |
2120 | | let col_a = &col("a", &schema)?; |
2121 | | let col_b = &col("b", &schema)?; |
2122 | | let required_columns = [Arc::clone(col_b), Arc::clone(col_a)]; |
2123 | | let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); |
2124 | | eq_properties.add_new_orderings([ |
2125 | | vec![PhysicalSortExpr { |
2126 | | expr: Arc::new(Column::new("c", 2)), |
2127 | | options: sort_options, |
2128 | | }], |
2129 | | vec![ |
2130 | | PhysicalSortExpr { |
2131 | | expr: Arc::new(Column::new("b", 1)), |
2132 | | options: sort_options_not, |
2133 | | }, |
2134 | | PhysicalSortExpr { |
2135 | | expr: Arc::new(Column::new("a", 0)), |
2136 | | options: sort_options, |
2137 | | }, |
2138 | | ], |
2139 | | ]); |
2140 | | let (result, idxs) = eq_properties.find_longest_permutation(&required_columns); |
2141 | | assert_eq!(idxs, vec![0, 1]); |
2142 | | assert_eq!( |
2143 | | result, |
2144 | | vec![ |
2145 | | PhysicalSortExpr { |
2146 | | expr: Arc::clone(col_b), |
2147 | | options: sort_options_not |
2148 | | }, |
2149 | | PhysicalSortExpr { |
2150 | | expr: Arc::clone(col_a), |
2151 | | options: sort_options |
2152 | | } |
2153 | | ] |
2154 | | ); |
2155 | | |
2156 | | let required_columns = [ |
2157 | | Arc::new(Column::new("b", 1)) as _, |
2158 | | Arc::new(Column::new("a", 0)) as _, |
2159 | | ]; |
2160 | | let schema = Schema::new(vec![ |
2161 | | Field::new("a", DataType::Int32, true), |
2162 | | Field::new("b", DataType::Int32, true), |
2163 | | Field::new("c", DataType::Int32, true), |
2164 | | ]); |
2165 | | let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); |
2166 | | |
2167 | | // not satisfied orders |
2168 | | eq_properties.add_new_orderings([vec![ |
2169 | | PhysicalSortExpr { |
2170 | | expr: Arc::new(Column::new("b", 1)), |
2171 | | options: sort_options_not, |
2172 | | }, |
2173 | | PhysicalSortExpr { |
2174 | | expr: Arc::new(Column::new("c", 2)), |
2175 | | options: sort_options, |
2176 | | }, |
2177 | | PhysicalSortExpr { |
2178 | | expr: Arc::new(Column::new("a", 0)), |
2179 | | options: sort_options, |
2180 | | }, |
2181 | | ]]); |
2182 | | let (_, idxs) = eq_properties.find_longest_permutation(&required_columns); |
2183 | | assert_eq!(idxs, vec![0]); |
2184 | | |
2185 | | Ok(()) |
2186 | | } |
2187 | | |
2188 | | #[test] |
2189 | | fn test_update_properties() -> Result<()> { |
2190 | | let schema = Schema::new(vec![ |
2191 | | Field::new("a", DataType::Int32, true), |
2192 | | Field::new("b", DataType::Int32, true), |
2193 | | Field::new("c", DataType::Int32, true), |
2194 | | Field::new("d", DataType::Int32, true), |
2195 | | ]); |
2196 | | |
2197 | | let mut eq_properties = EquivalenceProperties::new(Arc::new(schema.clone())); |
2198 | | let col_a = &col("a", &schema)?; |
2199 | | let col_b = &col("b", &schema)?; |
2200 | | let col_c = &col("c", &schema)?; |
2201 | | let col_d = &col("d", &schema)?; |
2202 | | let option_asc = SortOptions { |
2203 | | descending: false, |
2204 | | nulls_first: false, |
2205 | | }; |
2206 | | // b=a (e.g they are aliases) |
2207 | | eq_properties.add_equal_conditions(col_b, col_a)?; |
2208 | | // [b ASC], [d ASC] |
2209 | | eq_properties.add_new_orderings(vec![ |
2210 | | vec![PhysicalSortExpr { |
2211 | | expr: Arc::clone(col_b), |
2212 | | options: option_asc, |
2213 | | }], |
2214 | | vec![PhysicalSortExpr { |
2215 | | expr: Arc::clone(col_d), |
2216 | | options: option_asc, |
2217 | | }], |
2218 | | ]); |
2219 | | |
2220 | | let test_cases = vec![ |
2221 | | // d + b |
2222 | | ( |
2223 | | Arc::new(BinaryExpr::new( |
2224 | | Arc::clone(col_d), |
2225 | | Operator::Plus, |
2226 | | Arc::clone(col_b), |
2227 | | )) as Arc<dyn PhysicalExpr>, |
2228 | | SortProperties::Ordered(option_asc), |
2229 | | ), |
2230 | | // b |
2231 | | (Arc::clone(col_b), SortProperties::Ordered(option_asc)), |
2232 | | // a |
2233 | | (Arc::clone(col_a), SortProperties::Ordered(option_asc)), |
2234 | | // a + c |
2235 | | ( |
2236 | | Arc::new(BinaryExpr::new( |
2237 | | Arc::clone(col_a), |
2238 | | Operator::Plus, |
2239 | | Arc::clone(col_c), |
2240 | | )), |
2241 | | SortProperties::Unordered, |
2242 | | ), |
2243 | | ]; |
2244 | | for (expr, expected) in test_cases { |
2245 | | let leading_orderings = eq_properties |
2246 | | .oeq_class() |
2247 | | .iter() |
2248 | | .flat_map(|ordering| ordering.first().cloned()) |
2249 | | .collect::<Vec<_>>(); |
2250 | | let expr_props = eq_properties.get_expr_properties(Arc::clone(&expr)); |
2251 | | let err_msg = format!( |
2252 | | "expr:{:?}, expected: {:?}, actual: {:?}, leading_orderings: {leading_orderings:?}", |
2253 | | expr, expected, expr_props.sort_properties |
2254 | | ); |
2255 | | assert_eq!(expr_props.sort_properties, expected, "{}", err_msg); |
2256 | | } |
2257 | | |
2258 | | Ok(()) |
2259 | | } |
2260 | | |
2261 | | #[test] |
2262 | | fn test_find_longest_permutation_random() -> Result<()> { |
2263 | | const N_RANDOM_SCHEMA: usize = 100; |
2264 | | const N_ELEMENTS: usize = 125; |
2265 | | const N_DISTINCT: usize = 5; |
2266 | | |
2267 | | for seed in 0..N_RANDOM_SCHEMA { |
2268 | | // Create a random schema with random properties |
2269 | | let (test_schema, eq_properties) = create_random_schema(seed as u64)?; |
2270 | | // Generate a data that satisfies properties given |
2271 | | let table_data_with_properties = |
2272 | | generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?; |
2273 | | |
2274 | | let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new()); |
2275 | | let floor_a = crate::udf::create_physical_expr( |
2276 | | &test_fun, |
2277 | | &[col("a", &test_schema)?], |
2278 | | &test_schema, |
2279 | | &[], |
2280 | | &DFSchema::empty(), |
2281 | | )?; |
2282 | | let a_plus_b = Arc::new(BinaryExpr::new( |
2283 | | col("a", &test_schema)?, |
2284 | | Operator::Plus, |
2285 | | col("b", &test_schema)?, |
2286 | | )) as Arc<dyn PhysicalExpr>; |
2287 | | let exprs = [ |
2288 | | col("a", &test_schema)?, |
2289 | | col("b", &test_schema)?, |
2290 | | col("c", &test_schema)?, |
2291 | | col("d", &test_schema)?, |
2292 | | col("e", &test_schema)?, |
2293 | | col("f", &test_schema)?, |
2294 | | floor_a, |
2295 | | a_plus_b, |
2296 | | ]; |
2297 | | |
2298 | | for n_req in 0..=exprs.len() { |
2299 | | for exprs in exprs.iter().combinations(n_req) { |
2300 | | let exprs = exprs.into_iter().cloned().collect::<Vec<_>>(); |
2301 | | let (ordering, indices) = |
2302 | | eq_properties.find_longest_permutation(&exprs); |
2303 | | // Make sure that find_longest_permutation return values are consistent |
2304 | | let ordering2 = indices |
2305 | | .iter() |
2306 | | .zip(ordering.iter()) |
2307 | | .map(|(&idx, sort_expr)| PhysicalSortExpr { |
2308 | | expr: Arc::clone(&exprs[idx]), |
2309 | | options: sort_expr.options, |
2310 | | }) |
2311 | | .collect::<Vec<_>>(); |
2312 | | assert_eq!( |
2313 | | ordering, ordering2, |
2314 | | "indices and lexicographical ordering do not match" |
2315 | | ); |
2316 | | |
2317 | | let err_msg = format!( |
2318 | | "Error in test case ordering:{:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}", |
2319 | | ordering, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants |
2320 | | ); |
2321 | | assert_eq!(ordering.len(), indices.len(), "{}", err_msg); |
2322 | | // Since ordered section satisfies schema, we expect |
2323 | | // that result will be same after sort (e.g sort was unnecessary). |
2324 | | assert!( |
2325 | | is_table_same_after_sort( |
2326 | | ordering.clone(), |
2327 | | table_data_with_properties.clone(), |
2328 | | )?, |
2329 | | "{}", |
2330 | | err_msg |
2331 | | ); |
2332 | | } |
2333 | | } |
2334 | | } |
2335 | | |
2336 | | Ok(()) |
2337 | | } |
2338 | | #[test] |
2339 | | fn test_find_longest_permutation() -> Result<()> { |
2340 | | // Schema satisfies following orderings: |
2341 | | // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC] |
2342 | | // and |
2343 | | // Column [a=c] (e.g they are aliases). |
2344 | | // At below we add [d ASC, h DESC] also, for test purposes |
2345 | | let (test_schema, mut eq_properties) = create_test_params()?; |
2346 | | let col_a = &col("a", &test_schema)?; |
2347 | | let col_b = &col("b", &test_schema)?; |
2348 | | let col_c = &col("c", &test_schema)?; |
2349 | | let col_d = &col("d", &test_schema)?; |
2350 | | let col_e = &col("e", &test_schema)?; |
2351 | | let col_f = &col("f", &test_schema)?; |
2352 | | let col_h = &col("h", &test_schema)?; |
2353 | | // a + d |
2354 | | let a_plus_d = Arc::new(BinaryExpr::new( |
2355 | | Arc::clone(col_a), |
2356 | | Operator::Plus, |
2357 | | Arc::clone(col_d), |
2358 | | )) as Arc<dyn PhysicalExpr>; |
2359 | | |
2360 | | let option_asc = SortOptions { |
2361 | | descending: false, |
2362 | | nulls_first: false, |
2363 | | }; |
2364 | | let option_desc = SortOptions { |
2365 | | descending: true, |
2366 | | nulls_first: true, |
2367 | | }; |
2368 | | // [d ASC, h DESC] also satisfies schema. |
2369 | | eq_properties.add_new_orderings([vec![ |
2370 | | PhysicalSortExpr { |
2371 | | expr: Arc::clone(col_d), |
2372 | | options: option_asc, |
2373 | | }, |
2374 | | PhysicalSortExpr { |
2375 | | expr: Arc::clone(col_h), |
2376 | | options: option_desc, |
2377 | | }, |
2378 | | ]]); |
2379 | | let test_cases = vec![ |
2380 | | // TEST CASE 1 |
2381 | | (vec![col_a], vec![(col_a, option_asc)]), |
2382 | | // TEST CASE 2 |
2383 | | (vec![col_c], vec![(col_c, option_asc)]), |
2384 | | // TEST CASE 3 |
2385 | | ( |
2386 | | vec![col_d, col_e, col_b], |
2387 | | vec![ |
2388 | | (col_d, option_asc), |
2389 | | (col_e, option_desc), |
2390 | | (col_b, option_asc), |
2391 | | ], |
2392 | | ), |
2393 | | // TEST CASE 4 |
2394 | | (vec![col_b], vec![]), |
2395 | | // TEST CASE 5 |
2396 | | (vec![col_d], vec![(col_d, option_asc)]), |
2397 | | // TEST CASE 5 |
2398 | | (vec![&a_plus_d], vec![(&a_plus_d, option_asc)]), |
2399 | | // TEST CASE 6 |
2400 | | ( |
2401 | | vec![col_b, col_d], |
2402 | | vec![(col_d, option_asc), (col_b, option_asc)], |
2403 | | ), |
2404 | | // TEST CASE 6 |
2405 | | ( |
2406 | | vec![col_c, col_e], |
2407 | | vec![(col_c, option_asc), (col_e, option_desc)], |
2408 | | ), |
2409 | | // TEST CASE 7 |
2410 | | ( |
2411 | | vec![col_d, col_h, col_e, col_f, col_b], |
2412 | | vec![ |
2413 | | (col_d, option_asc), |
2414 | | (col_e, option_desc), |
2415 | | (col_h, option_desc), |
2416 | | (col_f, option_asc), |
2417 | | (col_b, option_asc), |
2418 | | ], |
2419 | | ), |
2420 | | // TEST CASE 8 |
2421 | | ( |
2422 | | vec![col_e, col_d, col_h, col_f, col_b], |
2423 | | vec![ |
2424 | | (col_e, option_desc), |
2425 | | (col_d, option_asc), |
2426 | | (col_h, option_desc), |
2427 | | (col_f, option_asc), |
2428 | | (col_b, option_asc), |
2429 | | ], |
2430 | | ), |
2431 | | // TEST CASE 9 |
2432 | | ( |
2433 | | vec![col_e, col_d, col_b, col_h, col_f], |
2434 | | vec![ |
2435 | | (col_e, option_desc), |
2436 | | (col_d, option_asc), |
2437 | | (col_b, option_asc), |
2438 | | (col_h, option_desc), |
2439 | | (col_f, option_asc), |
2440 | | ], |
2441 | | ), |
2442 | | ]; |
2443 | | for (exprs, expected) in test_cases { |
2444 | | let exprs = exprs.into_iter().cloned().collect::<Vec<_>>(); |
2445 | | let expected = convert_to_sort_exprs(&expected); |
2446 | | let (actual, _) = eq_properties.find_longest_permutation(&exprs); |
2447 | | assert_eq!(actual, expected); |
2448 | | } |
2449 | | |
2450 | | Ok(()) |
2451 | | } |
2452 | | |
2453 | | #[test] |
2454 | | fn test_find_longest_permutation2() -> Result<()> { |
2455 | | // Schema satisfies following orderings: |
2456 | | // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC] |
2457 | | // and |
2458 | | // Column [a=c] (e.g they are aliases). |
2459 | | // At below we add [d ASC, h DESC] also, for test purposes |
2460 | | let (test_schema, mut eq_properties) = create_test_params()?; |
2461 | | let col_h = &col("h", &test_schema)?; |
2462 | | |
2463 | | // Add column h as constant |
2464 | | eq_properties = eq_properties.with_constants(vec![ConstExpr::from(col_h)]); |
2465 | | |
2466 | | let test_cases = vec![ |
2467 | | // TEST CASE 1 |
2468 | | // ordering of the constants are treated as default ordering. |
2469 | | // This is the convention currently used. |
2470 | | (vec![col_h], vec![(col_h, SortOptions::default())]), |
2471 | | ]; |
2472 | | for (exprs, expected) in test_cases { |
2473 | | let exprs = exprs.into_iter().cloned().collect::<Vec<_>>(); |
2474 | | let expected = convert_to_sort_exprs(&expected); |
2475 | | let (actual, _) = eq_properties.find_longest_permutation(&exprs); |
2476 | | assert_eq!(actual, expected); |
2477 | | } |
2478 | | |
2479 | | Ok(()) |
2480 | | } |
2481 | | |
2482 | | #[test] |
2483 | | fn test_get_finer() -> Result<()> { |
2484 | | let schema = create_test_schema()?; |
2485 | | let col_a = &col("a", &schema)?; |
2486 | | let col_b = &col("b", &schema)?; |
2487 | | let col_c = &col("c", &schema)?; |
2488 | | let eq_properties = EquivalenceProperties::new(schema); |
2489 | | let option_asc = SortOptions { |
2490 | | descending: false, |
2491 | | nulls_first: false, |
2492 | | }; |
2493 | | let option_desc = SortOptions { |
2494 | | descending: true, |
2495 | | nulls_first: true, |
2496 | | }; |
2497 | | // First entry, and second entry are the physical sort requirement that are argument for get_finer_requirement. |
2498 | | // Third entry is the expected result. |
2499 | | let tests_cases = vec![ |
2500 | | // Get finer requirement between [a Some(ASC)] and [a None, b Some(ASC)] |
2501 | | // result should be [a Some(ASC), b Some(ASC)] |
2502 | | ( |
2503 | | vec![(col_a, Some(option_asc))], |
2504 | | vec![(col_a, None), (col_b, Some(option_asc))], |
2505 | | Some(vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))]), |
2506 | | ), |
2507 | | // Get finer requirement between [a Some(ASC), b Some(ASC), c Some(ASC)] and [a Some(ASC), b Some(ASC)] |
2508 | | // result should be [a Some(ASC), b Some(ASC), c Some(ASC)] |
2509 | | ( |
2510 | | vec![ |
2511 | | (col_a, Some(option_asc)), |
2512 | | (col_b, Some(option_asc)), |
2513 | | (col_c, Some(option_asc)), |
2514 | | ], |
2515 | | vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))], |
2516 | | Some(vec![ |
2517 | | (col_a, Some(option_asc)), |
2518 | | (col_b, Some(option_asc)), |
2519 | | (col_c, Some(option_asc)), |
2520 | | ]), |
2521 | | ), |
2522 | | // Get finer requirement between [a Some(ASC), b Some(ASC)] and [a Some(ASC), b Some(DESC)] |
2523 | | // result should be None |
2524 | | ( |
2525 | | vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))], |
2526 | | vec![(col_a, Some(option_asc)), (col_b, Some(option_desc))], |
2527 | | None, |
2528 | | ), |
2529 | | ]; |
2530 | | for (lhs, rhs, expected) in tests_cases { |
2531 | | let lhs = convert_to_sort_reqs(&lhs); |
2532 | | let rhs = convert_to_sort_reqs(&rhs); |
2533 | | let expected = expected.map(|expected| convert_to_sort_reqs(&expected)); |
2534 | | let finer = eq_properties.get_finer_requirement(&lhs, &rhs); |
2535 | | assert_eq!(finer, expected) |
2536 | | } |
2537 | | |
2538 | | Ok(()) |
2539 | | } |
2540 | | |
2541 | | #[test] |
2542 | | fn test_normalize_sort_reqs() -> Result<()> { |
2543 | | // Schema satisfies following properties |
2544 | | // a=c |
2545 | | // and following orderings are valid |
2546 | | // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC] |
2547 | | let (test_schema, eq_properties) = create_test_params()?; |
2548 | | let col_a = &col("a", &test_schema)?; |
2549 | | let col_b = &col("b", &test_schema)?; |
2550 | | let col_c = &col("c", &test_schema)?; |
2551 | | let col_d = &col("d", &test_schema)?; |
2552 | | let col_e = &col("e", &test_schema)?; |
2553 | | let col_f = &col("f", &test_schema)?; |
2554 | | let option_asc = SortOptions { |
2555 | | descending: false, |
2556 | | nulls_first: false, |
2557 | | }; |
2558 | | let option_desc = SortOptions { |
2559 | | descending: true, |
2560 | | nulls_first: true, |
2561 | | }; |
2562 | | // First element in the tuple stores vector of requirement, second element is the expected return value for ordering_satisfy function |
2563 | | let requirements = vec![ |
2564 | | ( |
2565 | | vec![(col_a, Some(option_asc))], |
2566 | | vec![(col_a, Some(option_asc))], |
2567 | | ), |
2568 | | ( |
2569 | | vec![(col_a, Some(option_desc))], |
2570 | | vec![(col_a, Some(option_desc))], |
2571 | | ), |
2572 | | (vec![(col_a, None)], vec![(col_a, None)]), |
2573 | | // Test whether equivalence works as expected |
2574 | | ( |
2575 | | vec![(col_c, Some(option_asc))], |
2576 | | vec![(col_a, Some(option_asc))], |
2577 | | ), |
2578 | | (vec![(col_c, None)], vec![(col_a, None)]), |
2579 | | // Test whether ordering equivalence works as expected |
2580 | | ( |
2581 | | vec![(col_d, Some(option_asc)), (col_b, Some(option_asc))], |
2582 | | vec![(col_d, Some(option_asc)), (col_b, Some(option_asc))], |
2583 | | ), |
2584 | | ( |
2585 | | vec![(col_d, None), (col_b, None)], |
2586 | | vec![(col_d, None), (col_b, None)], |
2587 | | ), |
2588 | | ( |
2589 | | vec![(col_e, Some(option_desc)), (col_f, Some(option_asc))], |
2590 | | vec![(col_e, Some(option_desc)), (col_f, Some(option_asc))], |
2591 | | ), |
2592 | | // We should be able to normalize in compatible requirements also (not exactly equal) |
2593 | | ( |
2594 | | vec![(col_e, Some(option_desc)), (col_f, None)], |
2595 | | vec![(col_e, Some(option_desc)), (col_f, None)], |
2596 | | ), |
2597 | | ( |
2598 | | vec![(col_e, None), (col_f, None)], |
2599 | | vec![(col_e, None), (col_f, None)], |
2600 | | ), |
2601 | | ]; |
2602 | | |
2603 | | for (reqs, expected_normalized) in requirements.into_iter() { |
2604 | | let req = convert_to_sort_reqs(&reqs); |
2605 | | let expected_normalized = convert_to_sort_reqs(&expected_normalized); |
2606 | | |
2607 | | assert_eq!( |
2608 | | eq_properties.normalize_sort_requirements(&req), |
2609 | | expected_normalized |
2610 | | ); |
2611 | | } |
2612 | | |
2613 | | Ok(()) |
2614 | | } |
2615 | | |
2616 | | #[test] |
2617 | | fn test_schema_normalize_sort_requirement_with_equivalence() -> Result<()> { |
2618 | | let option1 = SortOptions { |
2619 | | descending: false, |
2620 | | nulls_first: false, |
2621 | | }; |
2622 | | // Assume that column a and c are aliases. |
2623 | | let (test_schema, eq_properties) = create_test_params()?; |
2624 | | let col_a = &col("a", &test_schema)?; |
2625 | | let col_c = &col("c", &test_schema)?; |
2626 | | let col_d = &col("d", &test_schema)?; |
2627 | | |
2628 | | // Test cases for equivalence normalization |
2629 | | // First entry in the tuple is PhysicalSortRequirement, second entry in the tuple is |
2630 | | // expected PhysicalSortRequirement after normalization. |
2631 | | let test_cases = vec![ |
2632 | | (vec![(col_a, Some(option1))], vec![(col_a, Some(option1))]), |
2633 | | // In the normalized version column c should be replace with column a |
2634 | | (vec![(col_c, Some(option1))], vec![(col_a, Some(option1))]), |
2635 | | (vec![(col_c, None)], vec![(col_a, None)]), |
2636 | | (vec![(col_d, Some(option1))], vec![(col_d, Some(option1))]), |
2637 | | ]; |
2638 | | for (reqs, expected) in test_cases.into_iter() { |
2639 | | let reqs = convert_to_sort_reqs(&reqs); |
2640 | | let expected = convert_to_sort_reqs(&expected); |
2641 | | |
2642 | | let normalized = eq_properties.normalize_sort_requirements(&reqs); |
2643 | | assert!( |
2644 | | expected.eq(&normalized), |
2645 | | "error in test: reqs: {reqs:?}, expected: {expected:?}, normalized: {normalized:?}" |
2646 | | ); |
2647 | | } |
2648 | | |
2649 | | Ok(()) |
2650 | | } |
2651 | | |
2652 | | #[test] |
2653 | | fn test_eliminate_redundant_monotonic_sorts() -> Result<()> { |
2654 | | let schema = Arc::new(Schema::new(vec![ |
2655 | | Field::new("a", DataType::Date32, true), |
2656 | | Field::new("b", DataType::Utf8, true), |
2657 | | Field::new("c", DataType::Timestamp(TimeUnit::Nanosecond, None), true), |
2658 | | ])); |
2659 | | let base_properties = EquivalenceProperties::new(Arc::clone(&schema)) |
2660 | | .with_reorder( |
2661 | | ["a", "b", "c"] |
2662 | | .into_iter() |
2663 | | .map(|c| { |
2664 | | col(c, schema.as_ref()).map(|expr| PhysicalSortExpr { |
2665 | | expr, |
2666 | | options: SortOptions { |
2667 | | descending: false, |
2668 | | nulls_first: true, |
2669 | | }, |
2670 | | }) |
2671 | | }) |
2672 | | .collect::<Result<Vec<_>>>()?, |
2673 | | ); |
2674 | | |
2675 | | struct TestCase { |
2676 | | name: &'static str, |
2677 | | constants: Vec<Arc<dyn PhysicalExpr>>, |
2678 | | equal_conditions: Vec<[Arc<dyn PhysicalExpr>; 2]>, |
2679 | | sort_columns: &'static [&'static str], |
2680 | | should_satisfy_ordering: bool, |
2681 | | } |
2682 | | |
2683 | | let col_a = col("a", schema.as_ref())?; |
2684 | | let col_b = col("b", schema.as_ref())?; |
2685 | | let col_c = col("c", schema.as_ref())?; |
2686 | | let cast_c = Arc::new(CastExpr::new(col_c, DataType::Date32, None)); |
2687 | | |
2688 | | let cases = vec![ |
2689 | | TestCase { |
2690 | | name: "(a, b, c) -> (c)", |
2691 | | // b is constant, so it should be removed from the sort order |
2692 | | constants: vec![Arc::clone(&col_b)], |
2693 | | equal_conditions: vec![[ |
2694 | | Arc::clone(&cast_c) as Arc<dyn PhysicalExpr>, |
2695 | | Arc::clone(&col_a), |
2696 | | ]], |
2697 | | sort_columns: &["c"], |
2698 | | should_satisfy_ordering: true, |
2699 | | }, |
2700 | | // Same test with above test, where equality order is swapped. |
2701 | | // Algorithm shouldn't depend on this order. |
2702 | | TestCase { |
2703 | | name: "(a, b, c) -> (c)", |
2704 | | // b is constant, so it should be removed from the sort order |
2705 | | constants: vec![col_b], |
2706 | | equal_conditions: vec![[ |
2707 | | Arc::clone(&col_a), |
2708 | | Arc::clone(&cast_c) as Arc<dyn PhysicalExpr>, |
2709 | | ]], |
2710 | | sort_columns: &["c"], |
2711 | | should_satisfy_ordering: true, |
2712 | | }, |
2713 | | TestCase { |
2714 | | name: "not ordered because (b) is not constant", |
2715 | | // b is not constant anymore |
2716 | | constants: vec![], |
2717 | | // a and c are still compatible, but this is irrelevant since the original ordering is (a, b, c) |
2718 | | equal_conditions: vec![[ |
2719 | | Arc::clone(&cast_c) as Arc<dyn PhysicalExpr>, |
2720 | | Arc::clone(&col_a), |
2721 | | ]], |
2722 | | sort_columns: &["c"], |
2723 | | should_satisfy_ordering: false, |
2724 | | }, |
2725 | | ]; |
2726 | | |
2727 | | for case in cases { |
2728 | | // Construct the equivalence properties in different orders |
2729 | | // to exercise different code paths |
2730 | | // (The resulting properties _should_ be the same) |
2731 | | for properties in [ |
2732 | | // Equal conditions before constants |
2733 | | { |
2734 | | let mut properties = base_properties.clone(); |
2735 | | for [left, right] in &case.equal_conditions { |
2736 | | properties.add_equal_conditions(left, right)? |
2737 | | } |
2738 | | properties.with_constants( |
2739 | | case.constants.iter().cloned().map(ConstExpr::from), |
2740 | | ) |
2741 | | }, |
2742 | | // Constants before equal conditions |
2743 | | { |
2744 | | let mut properties = base_properties.clone().with_constants( |
2745 | | case.constants.iter().cloned().map(ConstExpr::from), |
2746 | | ); |
2747 | | for [left, right] in &case.equal_conditions { |
2748 | | properties.add_equal_conditions(left, right)? |
2749 | | } |
2750 | | properties |
2751 | | }, |
2752 | | ] { |
2753 | | let sort = case |
2754 | | .sort_columns |
2755 | | .iter() |
2756 | | .map(|&name| { |
2757 | | col(name, &schema).map(|col| PhysicalSortExpr { |
2758 | | expr: col, |
2759 | | options: SortOptions::default(), |
2760 | | }) |
2761 | | }) |
2762 | | .collect::<Result<Vec<_>>>()?; |
2763 | | |
2764 | | assert_eq!( |
2765 | | properties.ordering_satisfy(&sort), |
2766 | | case.should_satisfy_ordering, |
2767 | | "failed test '{}'", |
2768 | | case.name |
2769 | | ); |
2770 | | } |
2771 | | } |
2772 | | |
2773 | | Ok(()) |
2774 | | } |
2775 | | |
2776 | | /// Return a new schema with the same types, but new field names |
2777 | | /// |
2778 | | /// The new field names are the old field names with `text` appended. |
2779 | | /// |
2780 | | /// For example, the schema "a", "b", "c" becomes "a1", "b1", "c1" |
2781 | | /// if `text` is "1". |
2782 | | fn append_fields(schema: &SchemaRef, text: &str) -> SchemaRef { |
2783 | | Arc::new(Schema::new( |
2784 | | schema |
2785 | | .fields() |
2786 | | .iter() |
2787 | | .map(|field| { |
2788 | | Field::new( |
2789 | | // Annotate name with `text`: |
2790 | | format!("{}{}", field.name(), text), |
2791 | | field.data_type().clone(), |
2792 | | field.is_nullable(), |
2793 | | ) |
2794 | | }) |
2795 | | .collect::<Vec<_>>(), |
2796 | | )) |
2797 | | } |
2798 | | |
2799 | | #[test] |
2800 | | fn test_union_equivalence_properties_multi_children_1() { |
2801 | | let schema = create_test_schema().unwrap(); |
2802 | | let schema2 = append_fields(&schema, "1"); |
2803 | | let schema3 = append_fields(&schema, "2"); |
2804 | | UnionEquivalenceTest::new(&schema) |
2805 | | // Children 1 |
2806 | | .with_child_sort(vec![vec!["a", "b", "c"]], &schema) |
2807 | | // Children 2 |
2808 | | .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2) |
2809 | | // Children 3 |
2810 | | .with_child_sort(vec![vec!["a2", "b2"]], &schema3) |
2811 | | .with_expected_sort(vec![vec!["a", "b"]]) |
2812 | | .run() |
2813 | | } |
2814 | | |
2815 | | #[test] |
2816 | | fn test_union_equivalence_properties_multi_children_2() { |
2817 | | let schema = create_test_schema().unwrap(); |
2818 | | let schema2 = append_fields(&schema, "1"); |
2819 | | let schema3 = append_fields(&schema, "2"); |
2820 | | UnionEquivalenceTest::new(&schema) |
2821 | | // Children 1 |
2822 | | .with_child_sort(vec![vec!["a", "b", "c"]], &schema) |
2823 | | // Children 2 |
2824 | | .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2) |
2825 | | // Children 3 |
2826 | | .with_child_sort(vec![vec!["a2", "b2", "c2"]], &schema3) |
2827 | | .with_expected_sort(vec![vec!["a", "b", "c"]]) |
2828 | | .run() |
2829 | | } |
2830 | | |
2831 | | #[test] |
2832 | | fn test_union_equivalence_properties_multi_children_3() { |
2833 | | let schema = create_test_schema().unwrap(); |
2834 | | let schema2 = append_fields(&schema, "1"); |
2835 | | let schema3 = append_fields(&schema, "2"); |
2836 | | UnionEquivalenceTest::new(&schema) |
2837 | | // Children 1 |
2838 | | .with_child_sort(vec![vec!["a", "b"]], &schema) |
2839 | | // Children 2 |
2840 | | .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2) |
2841 | | // Children 3 |
2842 | | .with_child_sort(vec![vec!["a2", "b2", "c2"]], &schema3) |
2843 | | .with_expected_sort(vec![vec!["a", "b"]]) |
2844 | | .run() |
2845 | | } |
2846 | | |
2847 | | #[test] |
2848 | | fn test_union_equivalence_properties_multi_children_4() { |
2849 | | let schema = create_test_schema().unwrap(); |
2850 | | let schema2 = append_fields(&schema, "1"); |
2851 | | let schema3 = append_fields(&schema, "2"); |
2852 | | UnionEquivalenceTest::new(&schema) |
2853 | | // Children 1 |
2854 | | .with_child_sort(vec![vec!["a", "b"]], &schema) |
2855 | | // Children 2 |
2856 | | .with_child_sort(vec![vec!["a1", "b1"]], &schema2) |
2857 | | // Children 3 |
2858 | | .with_child_sort(vec![vec!["b2", "c2"]], &schema3) |
2859 | | .with_expected_sort(vec![]) |
2860 | | .run() |
2861 | | } |
2862 | | |
2863 | | #[test] |
2864 | | fn test_union_equivalence_properties_multi_children_5() { |
2865 | | let schema = create_test_schema().unwrap(); |
2866 | | let schema2 = append_fields(&schema, "1"); |
2867 | | UnionEquivalenceTest::new(&schema) |
2868 | | // Children 1 |
2869 | | .with_child_sort(vec![vec!["a", "b"], vec!["c"]], &schema) |
2870 | | // Children 2 |
2871 | | .with_child_sort(vec![vec!["a1", "b1"], vec!["c1"]], &schema2) |
2872 | | .with_expected_sort(vec![vec!["a", "b"], vec!["c"]]) |
2873 | | .run() |
2874 | | } |
2875 | | |
2876 | | #[test] |
2877 | | fn test_union_equivalence_properties_constants_1() { |
2878 | | let schema = create_test_schema().unwrap(); |
2879 | | UnionEquivalenceTest::new(&schema) |
2880 | | .with_child_sort_and_const_exprs( |
2881 | | // First child: [a ASC], const [b, c] |
2882 | | vec![vec!["a"]], |
2883 | | vec!["b", "c"], |
2884 | | &schema, |
2885 | | ) |
2886 | | .with_child_sort_and_const_exprs( |
2887 | | // Second child: [b ASC], const [a, c] |
2888 | | vec![vec!["b"]], |
2889 | | vec!["a", "c"], |
2890 | | &schema, |
2891 | | ) |
2892 | | .with_expected_sort_and_const_exprs( |
2893 | | // Union expected orderings: [[a ASC], [b ASC]], const [c] |
2894 | | vec![vec!["a"], vec!["b"]], |
2895 | | vec!["c"], |
2896 | | ) |
2897 | | .run() |
2898 | | } |
2899 | | |
2900 | | #[test] |
2901 | | fn test_union_equivalence_properties_constants_2() { |
2902 | | let schema = create_test_schema().unwrap(); |
2903 | | UnionEquivalenceTest::new(&schema) |
2904 | | // Meet ordering between [a ASC], [a ASC, b ASC] should be [a ASC] |
2905 | | .with_child_sort_and_const_exprs( |
2906 | | // First child: [a ASC], const [] |
2907 | | vec![vec!["a"]], |
2908 | | vec![], |
2909 | | &schema, |
2910 | | ) |
2911 | | .with_child_sort_and_const_exprs( |
2912 | | // Second child: [a ASC, b ASC], const [] |
2913 | | vec![vec!["a", "b"]], |
2914 | | vec![], |
2915 | | &schema, |
2916 | | ) |
2917 | | .with_expected_sort_and_const_exprs( |
2918 | | // Union orderings: [a ASC], const [] |
2919 | | vec![vec!["a"]], |
2920 | | vec![], |
2921 | | ) |
2922 | | .run() |
2923 | | } |
2924 | | |
2925 | | #[test] |
2926 | | fn test_union_equivalence_properties_constants_3() { |
2927 | | let schema = create_test_schema().unwrap(); |
2928 | | UnionEquivalenceTest::new(&schema) |
2929 | | // Meet ordering between [a ASC], [a DESC] should be [] |
2930 | | .with_child_sort_and_const_exprs( |
2931 | | // First child: [a ASC], const [] |
2932 | | vec![vec!["a"]], |
2933 | | vec![], |
2934 | | &schema, |
2935 | | ) |
2936 | | .with_child_sort_and_const_exprs( |
2937 | | // Second child orderings: [a DESC], const [] |
2938 | | vec![vec!["a DESC"]], |
2939 | | vec![], |
2940 | | &schema, |
2941 | | ) |
2942 | | .with_expected_sort_and_const_exprs( |
2943 | | // Union doesn't have any ordering or constant |
2944 | | vec![], |
2945 | | vec![], |
2946 | | ) |
2947 | | .run() |
2948 | | } |
2949 | | |
2950 | | #[test] |
2951 | | fn test_union_equivalence_properties_constants_4() { |
2952 | | let schema = create_test_schema().unwrap(); |
2953 | | let schema2 = append_fields(&schema, "1"); |
2954 | | UnionEquivalenceTest::new(&schema) |
2955 | | .with_child_sort_and_const_exprs( |
2956 | | // First child orderings: [a ASC], const [] |
2957 | | vec![vec!["a"]], |
2958 | | vec![], |
2959 | | &schema, |
2960 | | ) |
2961 | | .with_child_sort_and_const_exprs( |
2962 | | // Second child orderings: [a1 ASC, b1 ASC], const [] |
2963 | | vec![vec!["a1", "b1"]], |
2964 | | vec![], |
2965 | | &schema2, |
2966 | | ) |
2967 | | .with_expected_sort_and_const_exprs( |
2968 | | // Union orderings: |
2969 | | // should be [a ASC] |
2970 | | // |
2971 | | // Where a, and a1 ath the same index for their corresponding |
2972 | | // schemas. |
2973 | | vec![vec!["a"]], |
2974 | | vec![], |
2975 | | ) |
2976 | | .run() |
2977 | | } |
2978 | | |
2979 | | #[test] |
2980 | | #[ignore] |
2981 | | // ignored due to https://github.com/apache/datafusion/issues/12446 |
2982 | | fn test_union_equivalence_properties_constants() { |
2983 | | let schema = create_test_schema().unwrap(); |
2984 | | UnionEquivalenceTest::new(&schema) |
2985 | | .with_child_sort_and_const_exprs( |
2986 | | // First child orderings: [a ASC, c ASC], const [b] |
2987 | | vec![vec!["a", "c"]], |
2988 | | vec!["b"], |
2989 | | &schema, |
2990 | | ) |
2991 | | .with_child_sort_and_const_exprs( |
2992 | | // Second child orderings: [b ASC, c ASC], const [a] |
2993 | | vec![vec!["b", "c"]], |
2994 | | vec!["a"], |
2995 | | &schema, |
2996 | | ) |
2997 | | .with_expected_sort_and_const_exprs( |
2998 | | // Union orderings: [ |
2999 | | // [a ASC, b ASC, c ASC], |
3000 | | // [b ASC, a ASC, c ASC] |
3001 | | // ], const [] |
3002 | | vec![vec!["a", "b", "c"], vec!["b", "a", "c"]], |
3003 | | vec![], |
3004 | | ) |
3005 | | .run() |
3006 | | } |
3007 | | |
3008 | | #[test] |
3009 | | #[ignore] |
3010 | | // ignored due to https://github.com/apache/datafusion/issues/12446 |
3011 | | fn test_union_equivalence_properties_constants_desc() { |
3012 | | let schema = create_test_schema().unwrap(); |
3013 | | UnionEquivalenceTest::new(&schema) |
3014 | | .with_child_sort_and_const_exprs( |
3015 | | // NB `b DESC` in the second child |
3016 | | // First child orderings: [a ASC, c ASC], const [b] |
3017 | | vec![vec!["a", "c"]], |
3018 | | vec!["b"], |
3019 | | &schema, |
3020 | | ) |
3021 | | .with_child_sort_and_const_exprs( |
3022 | | // Second child orderings: [b ASC, c ASC], const [a] |
3023 | | vec![vec!["b DESC", "c"]], |
3024 | | vec!["a"], |
3025 | | &schema, |
3026 | | ) |
3027 | | .with_expected_sort_and_const_exprs( |
3028 | | // Union orderings: [ |
3029 | | // [a ASC, b ASC, c ASC], |
3030 | | // [b ASC, a ASC, c ASC] |
3031 | | // ], const [] |
3032 | | vec![vec!["a", "b DESC", "c"], vec!["b DESC", "a", "c"]], |
3033 | | vec![], |
3034 | | ) |
3035 | | .run() |
3036 | | } |
3037 | | |
3038 | | #[test] |
3039 | | #[ignore] |
3040 | | // ignored due to https://github.com/apache/datafusion/issues/12446 |
3041 | | fn test_union_equivalence_properties_constants_middle() { |
3042 | | let schema = create_test_schema().unwrap(); |
3043 | | UnionEquivalenceTest::new(&schema) |
3044 | | .with_child_sort_and_const_exprs( |
3045 | | // First child: [a ASC, b ASC, d ASC], const [c] |
3046 | | vec![vec!["a", "b", "d"]], |
3047 | | vec!["c"], |
3048 | | &schema, |
3049 | | ) |
3050 | | .with_child_sort_and_const_exprs( |
3051 | | // Second child: [a ASC, c ASC, d ASC], const [b] |
3052 | | vec![vec!["a", "c", "d"]], |
3053 | | vec!["b"], |
3054 | | &schema, |
3055 | | ) |
3056 | | .with_expected_sort_and_const_exprs( |
3057 | | // Union orderings: |
3058 | | // [a, b, d] (c constant) |
3059 | | // [a, c, d] (b constant) |
3060 | | vec![vec!["a", "c", "b", "d"], vec!["a", "b", "c", "d"]], |
3061 | | vec![], |
3062 | | ) |
3063 | | .run() |
3064 | | } |
3065 | | |
3066 | | #[test] |
3067 | | #[ignore] |
3068 | | // ignored due to https://github.com/apache/datafusion/issues/12446 |
3069 | | fn test_union_equivalence_properties_constants_middle_desc() { |
3070 | | let schema = create_test_schema().unwrap(); |
3071 | | UnionEquivalenceTest::new(&schema) |
3072 | | .with_child_sort_and_const_exprs( |
3073 | | // NB `b DESC` in the first child |
3074 | | // |
3075 | | // First child: [a ASC, b DESC, d ASC], const [c] |
3076 | | vec![vec!["a", "b DESC", "d"]], |
3077 | | vec!["c"], |
3078 | | &schema, |
3079 | | ) |
3080 | | .with_child_sort_and_const_exprs( |
3081 | | // Second child: [a ASC, c ASC, d ASC], const [b] |
3082 | | vec![vec!["a", "c", "d"]], |
3083 | | vec!["b"], |
3084 | | &schema, |
3085 | | ) |
3086 | | .with_expected_sort_and_const_exprs( |
3087 | | // Union orderings: |
3088 | | // [a, b, d] (c constant) |
3089 | | // [a, c, d] (b constant) |
3090 | | vec![vec!["a", "c", "b DESC", "d"], vec!["a", "b DESC", "c", "d"]], |
3091 | | vec![], |
3092 | | ) |
3093 | | .run() |
3094 | | } |
3095 | | |
3096 | | // TODO tests with multiple constants |
3097 | | |
3098 | | #[derive(Debug)] |
3099 | | struct UnionEquivalenceTest { |
3100 | | /// The schema of the output of the Union |
3101 | | output_schema: SchemaRef, |
3102 | | /// The equivalence properties of each child to the union |
3103 | | child_properties: Vec<EquivalenceProperties>, |
3104 | | /// The expected output properties of the union. Must be set before |
3105 | | /// running `build` |
3106 | | expected_properties: Option<EquivalenceProperties>, |
3107 | | } |
3108 | | |
3109 | | impl UnionEquivalenceTest { |
3110 | | fn new(output_schema: &SchemaRef) -> Self { |
3111 | | Self { |
3112 | | output_schema: Arc::clone(output_schema), |
3113 | | child_properties: vec![], |
3114 | | expected_properties: None, |
3115 | | } |
3116 | | } |
3117 | | |
3118 | | /// Add a union input with the specified orderings |
3119 | | /// |
3120 | | /// See [`Self::make_props`] for the format of the strings in `orderings` |
3121 | | fn with_child_sort( |
3122 | | mut self, |
3123 | | orderings: Vec<Vec<&str>>, |
3124 | | schema: &SchemaRef, |
3125 | | ) -> Self { |
3126 | | let properties = self.make_props(orderings, vec![], schema); |
3127 | | self.child_properties.push(properties); |
3128 | | self |
3129 | | } |
3130 | | |
3131 | | /// Add a union input with the specified orderings and constant |
3132 | | /// equivalences |
3133 | | /// |
3134 | | /// See [`Self::make_props`] for the format of the strings in |
3135 | | /// `orderings` and `constants` |
3136 | | fn with_child_sort_and_const_exprs( |
3137 | | mut self, |
3138 | | orderings: Vec<Vec<&str>>, |
3139 | | constants: Vec<&str>, |
3140 | | schema: &SchemaRef, |
3141 | | ) -> Self { |
3142 | | let properties = self.make_props(orderings, constants, schema); |
3143 | | self.child_properties.push(properties); |
3144 | | self |
3145 | | } |
3146 | | |
3147 | | /// Set the expected output sort order for the union of the children |
3148 | | /// |
3149 | | /// See [`Self::make_props`] for the format of the strings in `orderings` |
3150 | | fn with_expected_sort(mut self, orderings: Vec<Vec<&str>>) -> Self { |
3151 | | let properties = self.make_props(orderings, vec![], &self.output_schema); |
3152 | | self.expected_properties = Some(properties); |
3153 | | self |
3154 | | } |
3155 | | |
3156 | | /// Set the expected output sort order and constant expressions for the |
3157 | | /// union of the children |
3158 | | /// |
3159 | | /// See [`Self::make_props`] for the format of the strings in |
3160 | | /// `orderings` and `constants`. |
3161 | | fn with_expected_sort_and_const_exprs( |
3162 | | mut self, |
3163 | | orderings: Vec<Vec<&str>>, |
3164 | | constants: Vec<&str>, |
3165 | | ) -> Self { |
3166 | | let properties = self.make_props(orderings, constants, &self.output_schema); |
3167 | | self.expected_properties = Some(properties); |
3168 | | self |
3169 | | } |
3170 | | |
3171 | | /// compute the union's output equivalence properties from the child |
3172 | | /// properties, and compare them to the expected properties |
3173 | | fn run(self) { |
3174 | | let Self { |
3175 | | output_schema, |
3176 | | child_properties, |
3177 | | expected_properties, |
3178 | | } = self; |
3179 | | let expected_properties = |
3180 | | expected_properties.expect("expected_properties not set"); |
3181 | | let actual_properties = |
3182 | | calculate_union(child_properties, Arc::clone(&output_schema)) |
3183 | | .expect("failed to calculate union equivalence properties"); |
3184 | | assert_eq_properties_same( |
3185 | | &actual_properties, |
3186 | | &expected_properties, |
3187 | | format!( |
3188 | | "expected: {expected_properties:?}\nactual: {actual_properties:?}" |
3189 | | ), |
3190 | | ); |
3191 | | } |
3192 | | |
3193 | | /// Make equivalence properties for the specified columns named in orderings and constants |
3194 | | /// |
3195 | | /// orderings: strings formatted like `"a"` or `"a DESC"`. See [`parse_sort_expr`] |
3196 | | /// constants: strings formatted like `"a"`. |
3197 | | fn make_props( |
3198 | | &self, |
3199 | | orderings: Vec<Vec<&str>>, |
3200 | | constants: Vec<&str>, |
3201 | | schema: &SchemaRef, |
3202 | | ) -> EquivalenceProperties { |
3203 | | let orderings = orderings |
3204 | | .iter() |
3205 | | .map(|ordering| { |
3206 | | ordering |
3207 | | .iter() |
3208 | | .map(|name| parse_sort_expr(name, schema)) |
3209 | | .collect::<Vec<_>>() |
3210 | | }) |
3211 | | .collect::<Vec<_>>(); |
3212 | | |
3213 | | let constants = constants |
3214 | | .iter() |
3215 | | .map(|col_name| ConstExpr::new(col(col_name, schema).unwrap())) |
3216 | | .collect::<Vec<_>>(); |
3217 | | |
3218 | | EquivalenceProperties::new_with_orderings(Arc::clone(schema), &orderings) |
3219 | | .with_constants(constants) |
3220 | | } |
3221 | | } |
3222 | | |
3223 | | fn assert_eq_properties_same( |
3224 | | lhs: &EquivalenceProperties, |
3225 | | rhs: &EquivalenceProperties, |
3226 | | err_msg: String, |
3227 | | ) { |
3228 | | // Check whether constants are same |
3229 | | let lhs_constants = lhs.constants(); |
3230 | | let rhs_constants = rhs.constants(); |
3231 | | for rhs_constant in rhs_constants { |
3232 | | assert!( |
3233 | | const_exprs_contains(lhs_constants, rhs_constant.expr()), |
3234 | | "{err_msg}\nlhs: {lhs}\nrhs: {rhs}" |
3235 | | ); |
3236 | | } |
3237 | | assert_eq!( |
3238 | | lhs_constants.len(), |
3239 | | rhs_constants.len(), |
3240 | | "{err_msg}\nlhs: {lhs}\nrhs: {rhs}" |
3241 | | ); |
3242 | | |
3243 | | // Check whether orderings are same. |
3244 | | let lhs_orderings = lhs.oeq_class(); |
3245 | | let rhs_orderings = &rhs.oeq_class.orderings; |
3246 | | for rhs_ordering in rhs_orderings { |
3247 | | assert!( |
3248 | | lhs_orderings.contains(rhs_ordering), |
3249 | | "{err_msg}\nlhs: {lhs}\nrhs: {rhs}" |
3250 | | ); |
3251 | | } |
3252 | | assert_eq!( |
3253 | | lhs_orderings.len(), |
3254 | | rhs_orderings.len(), |
3255 | | "{err_msg}\nlhs: {lhs}\nrhs: {rhs}" |
3256 | | ); |
3257 | | } |
3258 | | |
3259 | | /// Converts a string to a physical sort expression |
3260 | | /// |
3261 | | /// # Example |
3262 | | /// * `"a"` -> (`"a"`, `SortOptions::default()`) |
3263 | | /// * `"a ASC"` -> (`"a"`, `SortOptions { descending: false, nulls_first: false }`) |
3264 | | fn parse_sort_expr(name: &str, schema: &SchemaRef) -> PhysicalSortExpr { |
3265 | | let mut parts = name.split_whitespace(); |
3266 | | let name = parts.next().expect("empty sort expression"); |
3267 | | let mut sort_expr = PhysicalSortExpr::new( |
3268 | | col(name, schema).expect("invalid column name"), |
3269 | | SortOptions::default(), |
3270 | | ); |
3271 | | |
3272 | | if let Some(options) = parts.next() { |
3273 | | sort_expr = match options { |
3274 | | "ASC" => sort_expr.asc(), |
3275 | | "DESC" => sort_expr.desc(), |
3276 | | _ => panic!( |
3277 | | "unknown sort options. Expected 'ASC' or 'DESC', got {}", |
3278 | | options |
3279 | | ), |
3280 | | } |
3281 | | } |
3282 | | |
3283 | | assert!( |
3284 | | parts.next().is_none(), |
3285 | | "unexpected tokens in column name. Expected 'name' / 'name ASC' / 'name DESC' but got '{name}'" |
3286 | | ); |
3287 | | |
3288 | | sort_expr |
3289 | | } |
3290 | | } |