Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/equivalence/properties.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::fmt::Display;
19
use std::hash::{Hash, Hasher};
20
use std::sync::Arc;
21
22
use super::ordering::collapse_lex_ordering;
23
use crate::equivalence::class::const_exprs_contains;
24
use crate::equivalence::{
25
    collapse_lex_req, EquivalenceClass, EquivalenceGroup, OrderingEquivalenceClass,
26
    ProjectionMapping,
27
};
28
use crate::expressions::{with_new_schema, CastExpr, Column, Literal};
29
use crate::{
30
    physical_exprs_contains, ConstExpr, LexOrdering, LexOrderingRef, LexRequirement,
31
    LexRequirementRef, PhysicalExpr, PhysicalExprRef, PhysicalSortExpr,
32
    PhysicalSortRequirement,
33
};
34
35
use arrow_schema::{SchemaRef, SortOptions};
36
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
37
use datafusion_common::{internal_err, plan_err, JoinSide, JoinType, Result};
38
use datafusion_expr::interval_arithmetic::Interval;
39
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
40
use datafusion_physical_expr_common::utils::ExprPropertiesNode;
41
42
use indexmap::{IndexMap, IndexSet};
43
use itertools::Itertools;
44
45
/// A `EquivalenceProperties` object stores information known about the output
46
/// of a plan node, that can be used to optimize the plan.
47
///
48
/// Currently, it keeps track of:
49
/// - Sort expressions (orderings)
50
/// - Equivalent expressions: expressions that are known to have same value.
51
/// - Constants expressions: expressions that are known to contain a single
52
///   constant value.
53
///
54
/// # Example equivalent sort expressions
55
///
56
/// Consider table below:
57
///
58
/// ```text
59
/// ┌-------┐
60
/// | a | b |
61
/// |---|---|
62
/// | 1 | 9 |
63
/// | 2 | 8 |
64
/// | 3 | 7 |
65
/// | 5 | 5 |
66
/// └---┴---┘
67
/// ```
68
///
69
/// In this case, both `a ASC` and `b DESC` can describe the table ordering.
70
/// `EquivalenceProperties`, tracks these different valid sort expressions and
71
/// treat `a ASC` and `b DESC` on an equal footing. For example if the query
72
/// specifies the output sorted by EITHER `a ASC` or `b DESC`, the sort can be
73
/// avoided.
74
///
75
/// # Example equivalent expressions
76
///
77
/// Similarly, consider the table below:
78
///
79
/// ```text
80
/// ┌-------┐
81
/// | a | b |
82
/// |---|---|
83
/// | 1 | 1 |
84
/// | 2 | 2 |
85
/// | 3 | 3 |
86
/// | 5 | 5 |
87
/// └---┴---┘
88
/// ```
89
///
90
/// In this case,  columns `a` and `b` always have the same value, which can of
91
/// such equivalences inside this object. With this information, Datafusion can
92
/// optimize operations such as. For example, if the partition requirement is
93
/// `Hash(a)` and output partitioning is `Hash(b)`, then DataFusion avoids
94
/// repartitioning the data as the existing partitioning satisfies the
95
/// requirement.
96
///
97
/// # Code Example
98
/// ```
99
/// # use std::sync::Arc;
100
/// # use arrow_schema::{Schema, Field, DataType, SchemaRef};
101
/// # use datafusion_physical_expr::{ConstExpr, EquivalenceProperties};
102
/// # use datafusion_physical_expr::expressions::col;
103
/// use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
104
/// # let schema: SchemaRef = Arc::new(Schema::new(vec![
105
/// #   Field::new("a", DataType::Int32, false),
106
/// #   Field::new("b", DataType::Int32, false),
107
/// #   Field::new("c", DataType::Int32, false),
108
/// # ]));
109
/// # let col_a = col("a", &schema).unwrap();
110
/// # let col_b = col("b", &schema).unwrap();
111
/// # let col_c = col("c", &schema).unwrap();
112
/// // This object represents data that is sorted by a ASC, c DESC
113
/// // with a single constant value of b
114
/// let mut eq_properties = EquivalenceProperties::new(schema)
115
///   .with_constants(vec![ConstExpr::from(col_b)]);
116
/// eq_properties.add_new_ordering(vec![
117
///   PhysicalSortExpr::new_default(col_a).asc(),
118
///   PhysicalSortExpr::new_default(col_c).desc(),
119
/// ]);
120
///
121
/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC,c@2 DESC]], const: [b@1]")
122
/// ```
123
#[derive(Debug, Clone)]
124
pub struct EquivalenceProperties {
125
    /// Collection of equivalence classes that store expressions with the same
126
    /// value.
127
    pub eq_group: EquivalenceGroup,
128
    /// Equivalent sort expressions for this table.
129
    pub oeq_class: OrderingEquivalenceClass,
130
    /// Expressions whose values are constant throughout the table.
131
    /// TODO: We do not need to track constants separately, they can be tracked
132
    ///       inside `eq_groups` as `Literal` expressions.
133
    pub constants: Vec<ConstExpr>,
134
    /// Schema associated with this object.
135
    schema: SchemaRef,
136
}
137
138
impl EquivalenceProperties {
139
    /// Creates an empty `EquivalenceProperties` object.
140
1.21k
    pub fn new(schema: SchemaRef) -> Self {
141
1.21k
        Self {
142
1.21k
            eq_group: EquivalenceGroup::empty(),
143
1.21k
            oeq_class: OrderingEquivalenceClass::empty(),
144
1.21k
            constants: vec![],
145
1.21k
            schema,
146
1.21k
        }
147
1.21k
    }
148
149
    /// Creates a new `EquivalenceProperties` object with the given orderings.
150
1.89k
    pub fn new_with_orderings(schema: SchemaRef, orderings: &[LexOrdering]) -> Self {
151
1.89k
        Self {
152
1.89k
            eq_group: EquivalenceGroup::empty(),
153
1.89k
            oeq_class: OrderingEquivalenceClass::new(orderings.to_vec()),
154
1.89k
            constants: vec![],
155
1.89k
            schema,
156
1.89k
        }
157
1.89k
    }
158
159
    /// Returns the associated schema.
160
26.2k
    pub fn schema(&self) -> &SchemaRef {
161
26.2k
        &self.schema
162
26.2k
    }
163
164
    /// Returns a reference to the ordering equivalence class within.
165
4.73k
    pub fn oeq_class(&self) -> &OrderingEquivalenceClass {
166
4.73k
        &self.oeq_class
167
4.73k
    }
168
169
    /// Returns a reference to the equivalence group within.
170
2.61k
    pub fn eq_group(&self) -> &EquivalenceGroup {
171
2.61k
        &self.eq_group
172
2.61k
    }
173
174
    /// Returns a reference to the constant expressions
175
4.73k
    pub fn constants(&self) -> &[ConstExpr] {
176
4.73k
        &self.constants
177
4.73k
    }
178
179
    /// Returns the output ordering of the properties.
180
4.73k
    pub fn output_ordering(&self) -> Option<LexOrdering> {
181
4.73k
        let constants = self.constants();
182
4.73k
        let mut output_ordering = self.oeq_class().output_ordering().unwrap_or_default();
183
4.73k
        // Prune out constant expressions
184
4.73k
        output_ordering
185
4.73k
            .retain(|sort_expr| 
!const_exprs_contains(constants, &sort_expr.expr)2.18k
);
186
4.73k
        (!output_ordering.is_empty()).then_some(output_ordering)
187
4.73k
    }
188
189
    /// Returns the normalized version of the ordering equivalence class within.
190
    /// Normalization removes constants and duplicates as well as standardizing
191
    /// expressions according to the equivalence group within.
192
1.63k
    pub fn normalized_oeq_class(&self) -> OrderingEquivalenceClass {
193
1.63k
        OrderingEquivalenceClass::new(
194
1.63k
            self.oeq_class
195
1.63k
                .iter()
196
1.63k
                .map(|ordering| 
self.normalize_sort_exprs(ordering)947
)
197
1.63k
                .collect(),
198
1.63k
        )
199
1.63k
    }
200
201
    /// Extends this `EquivalenceProperties` with the `other` object.
202
3
    pub fn extend(mut self, other: Self) -> Self {
203
3
        self.eq_group.extend(other.eq_group);
204
3
        self.oeq_class.extend(other.oeq_class);
205
3
        self.with_constants(other.constants)
206
3
    }
207
208
    /// Clears (empties) the ordering equivalence class within this object.
209
    /// Call this method when existing orderings are invalidated.
210
80
    pub fn clear_orderings(&mut self) {
211
80
        self.oeq_class.clear();
212
80
    }
213
214
    /// Removes constant expressions that may change across partitions.
215
    /// This method should be used when data from different partitions are merged.
216
96
    pub fn clear_per_partition_constants(&mut self) {
217
96
        self.constants.retain(|item| 
item.across_partitions()0
);
218
96
    }
219
220
    /// Extends this `EquivalenceProperties` by adding the orderings inside the
221
    /// ordering equivalence class `other`.
222
454
    pub fn add_ordering_equivalence_class(&mut self, other: OrderingEquivalenceClass) {
223
454
        self.oeq_class.extend(other);
224
454
    }
225
226
    /// Adds new orderings into the existing ordering equivalence class.
227
24
    pub fn add_new_orderings(
228
24
        &mut self,
229
24
        orderings: impl IntoIterator<Item = LexOrdering>,
230
24
    ) {
231
24
        self.oeq_class.add_new_orderings(orderings);
232
24
    }
233
234
    /// Adds a single ordering to the existing ordering equivalence class.
235
0
    pub fn add_new_ordering(&mut self, ordering: LexOrdering) {
236
0
        self.add_new_orderings([ordering]);
237
0
    }
238
239
    /// Incorporates the given equivalence group to into the existing
240
    /// equivalence group within.
241
1.14k
    pub fn add_equivalence_group(&mut self, other_eq_group: EquivalenceGroup) {
242
1.14k
        self.eq_group.extend(other_eq_group);
243
1.14k
    }
244
245
    /// Adds a new equality condition into the existing equivalence group.
246
    /// If the given equality defines a new equivalence class, adds this new
247
    /// equivalence class to the equivalence group.
248
5
    pub fn add_equal_conditions(
249
5
        &mut self,
250
5
        left: &Arc<dyn PhysicalExpr>,
251
5
        right: &Arc<dyn PhysicalExpr>,
252
5
    ) -> Result<()> {
253
5
        // Discover new constants in light of new the equality:
254
5
        if self.is_expr_constant(left) {
255
            // Left expression is constant, add right as constant
256
0
            if !const_exprs_contains(&self.constants, right) {
257
0
                self.constants
258
0
                    .push(ConstExpr::from(right).with_across_partitions(true));
259
0
            }
260
5
        } else if self.is_expr_constant(right) {
261
            // Right expression is constant, add left as constant
262
4
            if !const_exprs_contains(&self.constants, left) {
263
4
                self.constants
264
4
                    .push(ConstExpr::from(left).with_across_partitions(true));
265
4
            }
0
266
1
        }
267
268
        // Add equal expressions to the state
269
5
        self.eq_group.add_equal_conditions(left, right);
270
5
271
5
        // Discover any new orderings
272
5
        self.discover_new_orderings(left)
?0
;
273
5
        Ok(())
274
5
    }
275
276
    /// Track/register physical expressions with constant values.
277
    #[deprecated(since = "43.0.0", note = "Use [`with_constants`] instead")]
278
0
    pub fn add_constants(self, constants: impl IntoIterator<Item = ConstExpr>) -> Self {
279
0
        self.with_constants(constants)
280
0
    }
281
282
    /// Track/register physical expressions with constant values.
283
789
    pub fn with_constants(
284
789
        mut self,
285
789
        constants: impl IntoIterator<Item = ConstExpr>,
286
789
    ) -> Self {
287
789
        let (const_exprs, across_partition_flags): (
288
789
            Vec<Arc<dyn PhysicalExpr>>,
289
789
            Vec<bool>,
290
789
        ) = constants
291
789
            .into_iter()
292
789
            .map(|const_expr| {
293
276
                let across_partitions = const_expr.across_partitions();
294
276
                let expr = const_expr.owned_expr();
295
276
                (expr, across_partitions)
296
789
            })
297
789
            .unzip();
298
789
        for (
expr, across_partitions276
) in self
299
789
            .eq_group
300
789
            .normalize_exprs(const_exprs)
301
789
            .into_iter()
302
789
            .zip(across_partition_flags)
303
        {
304
276
            if !const_exprs_contains(&self.constants, &expr) {
305
268
                let const_expr =
306
268
                    ConstExpr::from(expr).with_across_partitions(across_partitions);
307
268
                self.constants.push(const_expr);
308
268
            }
8
309
        }
310
311
789
        for 
ordering311
in self.normalized_oeq_class().iter() {
312
311
            if let Err(
e0
) = self.discover_new_orderings(&ordering[0].expr) {
313
0
                log::debug!("error discovering new orderings: {e}");
314
311
            }
315
        }
316
317
789
        self
318
789
    }
319
320
    // Discover new valid orderings in light of a new equality.
321
    // Accepts a single argument (`expr`) which is used to determine
322
    // which orderings should be updated.
323
    // When constants or equivalence classes are changed, there may be new orderings
324
    // that can be discovered with the new equivalence properties.
325
    // For a discussion, see: https://github.com/apache/datafusion/issues/9812
326
316
    fn discover_new_orderings(&mut self, expr: &Arc<dyn PhysicalExpr>) -> Result<()> {
327
316
        let normalized_expr = self.eq_group().normalize_expr(Arc::clone(expr));
328
316
        let eq_class = self
329
316
            .eq_group
330
316
            .classes
331
316
            .iter()
332
316
            .find_map(|class| {
333
5
                class
334
5
                    .contains(&normalized_expr)
335
5
                    .then(|| class.clone().into_vec())
336
316
            })
337
316
            .unwrap_or_else(|| 
vec![Arc::clone(&normalized_expr)]311
);
338
316
339
316
        let mut new_orderings: Vec<LexOrdering> = vec![];
340
316
        for (
ordering, next_expr184
) in self
341
316
            .normalized_oeq_class()
342
316
            .iter()
343
331
            .filter(|ordering| ordering[0].expr.eq(&normalized_expr))
344
316
            // First expression after leading ordering
345
316
            .filter_map(|ordering| 
Some(ordering).zip(ordering.get(1))313
)
346
        {
347
184
            let leading_ordering = ordering[0].options;
348
            // Currently, we only handle expressions with a single child.
349
            // TODO: It should be possible to handle expressions orderings like
350
            //       f(a, b, c), a, b, c if f is monotonic in all arguments.
351
368
            for 
equivalent_expr184
in &eq_class {
352
184
                let children = equivalent_expr.children();
353
184
                if children.len() == 1
354
0
                    && children[0].eq(&next_expr.expr)
355
0
                    && SortProperties::Ordered(leading_ordering)
356
0
                        == equivalent_expr
357
0
                            .get_properties(&[ExprProperties {
358
0
                                sort_properties: SortProperties::Ordered(
359
0
                                    leading_ordering,
360
0
                                ),
361
0
                                range: Interval::make_unbounded(
362
0
                                    &equivalent_expr.data_type(&self.schema)?,
363
0
                                )?,
364
0
                            }])?
365
                            .sort_properties
366
                {
367
                    // Assume existing ordering is [a ASC, b ASC]
368
                    // When equality a = f(b) is given, If we know that given ordering `[b ASC]`, ordering `[f(b) ASC]` is valid,
369
                    // then we can deduce that ordering `[b ASC]` is also valid.
370
                    // Hence, ordering `[b ASC]` can be added to the state as valid ordering.
371
                    // (e.g. existing ordering where leading ordering is removed)
372
0
                    new_orderings.push(ordering[1..].to_vec());
373
0
                    break;
374
184
                }
375
            }
376
        }
377
378
316
        self.oeq_class.add_new_orderings(new_orderings);
379
316
        Ok(())
380
316
    }
381
382
    /// Updates the ordering equivalence group within assuming that the table
383
    /// is re-sorted according to the argument `sort_exprs`. Note that constants
384
    /// and equivalence classes are unchanged as they are unaffected by a re-sort.
385
35
    pub fn with_reorder(mut self, sort_exprs: Vec<PhysicalSortExpr>) -> Self {
386
35
        // TODO: In some cases, existing ordering equivalences may still be valid add this analysis.
387
35
        self.oeq_class = OrderingEquivalenceClass::new(vec![sort_exprs]);
388
35
        self
389
35
    }
390
391
    /// Normalizes the given sort expressions (i.e. `sort_exprs`) using the
392
    /// equivalence group and the ordering equivalence class within.
393
    ///
394
    /// Assume that `self.eq_group` states column `a` and `b` are aliases.
395
    /// Also assume that `self.oeq_class` states orderings `d ASC` and `a ASC, c ASC`
396
    /// are equivalent (in the sense that both describe the ordering of the table).
397
    /// If the `sort_exprs` argument were `vec![b ASC, c ASC, a ASC]`, then this
398
    /// function would return `vec![a ASC, c ASC]`. Internally, it would first
399
    /// normalize to `vec![a ASC, c ASC, a ASC]` and end up with the final result
400
    /// after deduplication.
401
947
    fn normalize_sort_exprs(&self, sort_exprs: LexOrderingRef) -> LexOrdering {
402
947
        // Convert sort expressions to sort requirements:
403
947
        let sort_reqs = PhysicalSortRequirement::from_sort_exprs(sort_exprs.iter());
404
947
        // Normalize the requirements:
405
947
        let normalized_sort_reqs = self.normalize_sort_requirements(&sort_reqs);
406
947
        // Convert sort requirements back to sort expressions:
407
947
        PhysicalSortRequirement::to_sort_exprs(normalized_sort_reqs)
408
947
    }
409
410
    /// Normalizes the given sort requirements (i.e. `sort_reqs`) using the
411
    /// equivalence group and the ordering equivalence class within. It works by:
412
    /// - Removing expressions that have a constant value from the given requirement.
413
    /// - Replacing sections that belong to some equivalence class in the equivalence
414
    ///   group with the first entry in the matching equivalence class.
415
    ///
416
    /// Assume that `self.eq_group` states column `a` and `b` are aliases.
417
    /// Also assume that `self.oeq_class` states orderings `d ASC` and `a ASC, c ASC`
418
    /// are equivalent (in the sense that both describe the ordering of the table).
419
    /// If the `sort_reqs` argument were `vec![b ASC, c ASC, a ASC]`, then this
420
    /// function would return `vec![a ASC, c ASC]`. Internally, it would first
421
    /// normalize to `vec![a ASC, c ASC, a ASC]` and end up with the final result
422
    /// after deduplication.
423
1.25k
    fn normalize_sort_requirements(
424
1.25k
        &self,
425
1.25k
        sort_reqs: LexRequirementRef,
426
1.25k
    ) -> LexRequirement {
427
1.25k
        let normalized_sort_reqs = self.eq_group.normalize_sort_requirements(sort_reqs);
428
1.25k
        let mut constant_exprs = vec![];
429
1.25k
        constant_exprs.extend(
430
1.25k
            self.constants
431
1.25k
                .iter()
432
1.25k
                .map(|const_expr| 
Arc::clone(const_expr.expr())1.17k
),
433
1.25k
        );
434
1.25k
        let constants_normalized = self.eq_group.normalize_exprs(constant_exprs);
435
1.25k
        // Prune redundant sections in the requirement:
436
1.25k
        collapse_lex_req(
437
1.25k
            normalized_sort_reqs
438
1.25k
                .iter()
439
3.41k
                .filter(|&order| {
440
3.41k
                    !physical_exprs_contains(&constants_normalized, &order.expr)
441
3.41k
                })
442
1.25k
                .cloned()
443
1.25k
                .collect(),
444
1.25k
        )
445
1.25k
    }
446
447
    /// Checks whether the given ordering is satisfied by any of the existing
448
    /// orderings.
449
63
    pub fn ordering_satisfy(&self, given: LexOrderingRef) -> bool {
450
63
        // Convert the given sort expressions to sort requirements:
451
63
        let sort_requirements = PhysicalSortRequirement::from_sort_exprs(given.iter());
452
63
        self.ordering_satisfy_requirement(&sort_requirements)
453
63
    }
454
455
    /// Checks whether the given sort requirements are satisfied by any of the
456
    /// existing orderings.
457
199
    pub fn ordering_satisfy_requirement(&self, reqs: LexRequirementRef) -> bool {
458
199
        let mut eq_properties = self.clone();
459
199
        // First, standardize the given requirement:
460
199
        let normalized_reqs = eq_properties.normalize_sort_requirements(reqs);
461
268
        for 
normalized_req149
in normalized_reqs {
462
            // Check whether given ordering is satisfied
463
149
            if !eq_properties.ordering_satisfy_single(&normalized_req) {
464
80
                return false;
465
69
            }
466
69
            // Treat satisfied keys as constants in subsequent iterations. We
467
69
            // can do this because the "next" key only matters in a lexicographical
468
69
            // ordering when the keys to its left have the same values.
469
69
            //
470
69
            // Note that these expressions are not properly "constants". This is just
471
69
            // an implementation strategy confined to this function.
472
69
            //
473
69
            // For example, assume that the requirement is `[a ASC, (b + c) ASC]`,
474
69
            // and existing equivalent orderings are `[a ASC, b ASC]` and `[c ASC]`.
475
69
            // From the analysis above, we know that `[a ASC]` is satisfied. Then,
476
69
            // we add column `a` as constant to the algorithm state. This enables us
477
69
            // to deduce that `(b + c) ASC` is satisfied, given `a` is constant.
478
69
            eq_properties = eq_properties
479
69
                .with_constants(std::iter::once(ConstExpr::from(normalized_req.expr)));
480
        }
481
119
        true
482
199
    }
483
484
    /// Determines whether the ordering specified by the given sort requirement
485
    /// is satisfied based on the orderings within, equivalence classes, and
486
    /// constant expressions.
487
    ///
488
    /// # Arguments
489
    ///
490
    /// - `req`: A reference to a `PhysicalSortRequirement` for which the ordering
491
    ///   satisfaction check will be done.
492
    ///
493
    /// # Returns
494
    ///
495
    /// Returns `true` if the specified ordering is satisfied, `false` otherwise.
496
149
    fn ordering_satisfy_single(&self, req: &PhysicalSortRequirement) -> bool {
497
149
        let ExprProperties {
498
149
            sort_properties, ..
499
149
        } = self.get_expr_properties(Arc::clone(&req.expr));
500
149
        match sort_properties {
501
74
            SortProperties::Ordered(options) => {
502
74
                let sort_expr = PhysicalSortExpr {
503
74
                    expr: Arc::clone(&req.expr),
504
74
                    options,
505
74
                };
506
74
                sort_expr.satisfy(req, self.schema())
507
            }
508
            // Singleton expressions satisfies any ordering.
509
0
            SortProperties::Singleton => true,
510
75
            SortProperties::Unordered => false,
511
        }
512
149
    }
513
514
    /// Checks whether the `given`` sort requirements are equal or more specific
515
    /// than the `reference` sort requirements.
516
0
    pub fn requirements_compatible(
517
0
        &self,
518
0
        given: LexRequirementRef,
519
0
        reference: LexRequirementRef,
520
0
    ) -> bool {
521
0
        let normalized_given = self.normalize_sort_requirements(given);
522
0
        let normalized_reference = self.normalize_sort_requirements(reference);
523
0
524
0
        (normalized_reference.len() <= normalized_given.len())
525
0
            && normalized_reference
526
0
                .into_iter()
527
0
                .zip(normalized_given)
528
0
                .all(|(reference, given)| given.compatible(&reference))
529
0
    }
530
531
    /// Returns the finer ordering among the orderings `lhs` and `rhs`, breaking
532
    /// any ties by choosing `lhs`.
533
    ///
534
    /// The finer ordering is the ordering that satisfies both of the orderings.
535
    /// If the orderings are incomparable, returns `None`.
536
    ///
537
    /// For example, the finer ordering among `[a ASC]` and `[a ASC, b ASC]` is
538
    /// the latter.
539
54
    pub fn get_finer_ordering(
540
54
        &self,
541
54
        lhs: LexOrderingRef,
542
54
        rhs: LexOrderingRef,
543
54
    ) -> Option<LexOrdering> {
544
54
        // Convert the given sort expressions to sort requirements:
545
54
        let lhs = PhysicalSortRequirement::from_sort_exprs(lhs);
546
54
        let rhs = PhysicalSortRequirement::from_sort_exprs(rhs);
547
54
        let finer = self.get_finer_requirement(&lhs, &rhs);
548
54
        // Convert the chosen sort requirements back to sort expressions:
549
54
        finer.map(PhysicalSortRequirement::to_sort_exprs)
550
54
    }
551
552
    /// Returns the finer ordering among the requirements `lhs` and `rhs`,
553
    /// breaking any ties by choosing `lhs`.
554
    ///
555
    /// The finer requirements are the ones that satisfy both of the given
556
    /// requirements. If the requirements are incomparable, returns `None`.
557
    ///
558
    /// For example, the finer requirements among `[a ASC]` and `[a ASC, b ASC]`
559
    /// is the latter.
560
54
    pub fn get_finer_requirement(
561
54
        &self,
562
54
        req1: LexRequirementRef,
563
54
        req2: LexRequirementRef,
564
54
    ) -> Option<LexRequirement> {
565
54
        let mut lhs = self.normalize_sort_requirements(req1);
566
54
        let mut rhs = self.normalize_sort_requirements(req2);
567
54
        lhs.inner
568
54
            .iter_mut()
569
54
            .zip(rhs.inner.iter_mut())
570
54
            .all(|(lhs, rhs)| {
571
6
                lhs.expr.eq(&rhs.expr)
572
6
                    && match (lhs.options, rhs.options) {
573
6
                        (Some(lhs_opt), Some(rhs_opt)) => lhs_opt == rhs_opt,
574
0
                        (Some(options), None) => {
575
0
                            rhs.options = Some(options);
576
0
                            true
577
                        }
578
0
                        (None, Some(options)) => {
579
0
                            lhs.options = Some(options);
580
0
                            true
581
                        }
582
0
                        (None, None) => true,
583
                    }
584
54
            
}6
)
585
54
            .then_some(if lhs.len() >= rhs.len() { 
lhs48
} else {
rhs6
})
586
54
    }
587
588
    /// we substitute the ordering according to input expression type, this is a simplified version
589
    /// In this case, we just substitute when the expression satisfy the following condition:
590
    /// I. just have one column and is a CAST expression
591
    /// TODO: Add one-to-ones analysis for monotonic ScalarFunctions.
592
    /// TODO: we could precompute all the scenario that is computable, for example: atan(x + 1000) should also be substituted if
593
    ///  x is DESC or ASC
594
    /// After substitution, we may generate more than 1 `LexOrdering`. As an example,
595
    /// `[a ASC, b ASC]` will turn into `[a ASC, b ASC], [CAST(a) ASC, b ASC]` when projection expressions `a, b, CAST(a)` is applied.
596
1
    pub fn substitute_ordering_component(
597
1
        &self,
598
1
        mapping: &ProjectionMapping,
599
1
        sort_expr: &[PhysicalSortExpr],
600
1
    ) -> Result<Vec<Vec<PhysicalSortExpr>>> {
601
1
        let new_orderings = sort_expr
602
1
            .iter()
603
1
            .map(|sort_expr| {
604
1
                let referring_exprs: Vec<_> = mapping
605
1
                    .iter()
606
3
                    .map(|(source, _target)| source)
607
3
                    .filter(|source| expr_refers(source, &sort_expr.expr))
608
1
                    .cloned()
609
1
                    .collect();
610
1
                let mut res = vec![sort_expr.clone()];
611
                // TODO: Add one-to-ones analysis for ScalarFunctions.
612
2
                for 
r_expr1
in referring_exprs {
613
                    // we check whether this expression is substitutable or not
614
1
                    if let Some(
cast_expr0
) = r_expr.as_any().downcast_ref::<CastExpr>() {
615
                        // we need to know whether the Cast Expr matches or not
616
0
                        let expr_type = sort_expr.expr.data_type(&self.schema)?;
617
0
                        if cast_expr.expr.eq(&sort_expr.expr)
618
0
                            && cast_expr.is_bigger_cast(expr_type)
619
0
                        {
620
0
                            res.push(PhysicalSortExpr {
621
0
                                expr: Arc::clone(&r_expr),
622
0
                                options: sort_expr.options,
623
0
                            });
624
0
                        }
625
1
                    }
626
                }
627
1
                Ok(res)
628
1
            })
629
1
            .collect::<Result<Vec<_>>>()
?0
;
630
        // Generate all valid orderings, given substituted expressions.
631
1
        let res = new_orderings
632
1
            .into_iter()
633
1
            .multi_cartesian_product()
634
1
            .collect::<Vec<_>>();
635
1
        Ok(res)
636
1
    }
637
638
    /// In projection, supposed we have a input function 'A DESC B DESC' and the output shares the same expression
639
    /// with A and B, we could surely use the ordering of the original ordering, However, if the A has been changed,
640
    /// for example, A-> Cast(A, Int64) or any other form, it is invalid if we continue using the original ordering
641
    /// Since it would cause bug in dependency constructions, we should substitute the input order in order to get correct
642
    /// dependency map, happen in issue 8838: <https://github.com/apache/datafusion/issues/8838>
643
2
    pub fn substitute_oeq_class(&mut self, mapping: &ProjectionMapping) -> Result<()> {
644
2
        let orderings = &self.oeq_class.orderings;
645
2
        let new_order = orderings
646
2
            .iter()
647
2
            .map(|order| 
self.substitute_ordering_component(mapping, order)1
)
648
2
            .collect::<Result<Vec<_>>>()
?0
;
649
2
        let new_order = new_order.into_iter().flatten().collect();
650
2
        self.oeq_class = OrderingEquivalenceClass::new(new_order);
651
2
        Ok(())
652
2
    }
653
    /// Projects argument `expr` according to `projection_mapping`, taking
654
    /// equivalences into account.
655
    ///
656
    /// For example, assume that columns `a` and `c` are always equal, and that
657
    /// `projection_mapping` encodes following mapping:
658
    ///
659
    /// ```text
660
    /// a -> a1
661
    /// b -> b1
662
    /// ```
663
    ///
664
    /// Then, this function projects `a + b` to `Some(a1 + b1)`, `c + b` to
665
    /// `Some(a1 + b1)` and `d` to `None`, meaning that it  cannot be projected.
666
1
    pub fn project_expr(
667
1
        &self,
668
1
        expr: &Arc<dyn PhysicalExpr>,
669
1
        projection_mapping: &ProjectionMapping,
670
1
    ) -> Option<Arc<dyn PhysicalExpr>> {
671
1
        self.eq_group.project_expr(projection_mapping, expr)
672
1
    }
673
674
    /// Constructs a dependency map based on existing orderings referred to in
675
    /// the projection.
676
    ///
677
    /// This function analyzes the orderings in the normalized order-equivalence
678
    /// class and builds a dependency map. The dependency map captures relationships
679
    /// between expressions within the orderings, helping to identify dependencies
680
    /// and construct valid projected orderings during projection operations.
681
    ///
682
    /// # Parameters
683
    ///
684
    /// - `mapping`: A reference to the `ProjectionMapping` that defines the
685
    ///   relationship between source and target expressions.
686
    ///
687
    /// # Returns
688
    ///
689
    /// A [`DependencyMap`] representing the dependency map, where each
690
    /// [`DependencyNode`] contains dependencies for the key [`PhysicalSortExpr`].
691
    ///
692
    /// # Example
693
    ///
694
    /// Assume we have two equivalent orderings: `[a ASC, b ASC]` and `[a ASC, c ASC]`,
695
    /// and the projection mapping is `[a -> a_new, b -> b_new, b + c -> b + c]`.
696
    /// Then, the dependency map will be:
697
    ///
698
    /// ```text
699
    /// a ASC: Node {Some(a_new ASC), HashSet{}}
700
    /// b ASC: Node {Some(b_new ASC), HashSet{a ASC}}
701
    /// c ASC: Node {None, HashSet{a ASC}}
702
    /// ```
703
52
    fn construct_dependency_map(&self, mapping: &ProjectionMapping) -> DependencyMap {
704
52
        let mut dependency_map = IndexMap::new();
705
52
        for 
ordering1
in self.normalized_oeq_class().iter() {
706
1
            for (idx, sort_expr) in ordering.iter().enumerate() {
707
1
                let target_sort_expr =
708
1
                    self.project_expr(&sort_expr.expr, mapping).map(|expr| {
709
1
                        PhysicalSortExpr {
710
1
                            expr,
711
1
                            options: sort_expr.options,
712
1
                        }
713
1
                    });
714
1
                let is_projected = target_sort_expr.is_some();
715
1
                if is_projected
716
0
                    || mapping
717
0
                        .iter()
718
0
                        .any(|(source, _)| expr_refers(source, &sort_expr.expr))
719
1
                {
720
1
                    // Previous ordering is a dependency. Note that there is no,
721
1
                    // dependency for a leading ordering (i.e. the first sort
722
1
                    // expression).
723
1
                    let dependency = idx.checked_sub(1).map(|a| 
&ordering[a]0
);
724
1
                    // Add sort expressions that can be projected or referred to
725
1
                    // by any of the projection expressions to the dependency map:
726
1
                    dependency_map
727
1
                        .entry(sort_expr.clone())
728
1
                        .or_insert_with(|| DependencyNode {
729
1
                            target_sort_expr: target_sort_expr.clone(),
730
1
                            dependencies: IndexSet::new(),
731
1
                        })
732
1
                        .insert_dependency(dependency);
733
1
                }
0
734
1
                if !is_projected {
735
                    // If we can not project, stop constructing the dependency
736
                    // map as remaining dependencies will be invalid after projection.
737
0
                    break;
738
1
                }
739
            }
740
        }
741
52
        dependency_map
742
52
    }
743
744
    /// Returns a new `ProjectionMapping` where source expressions are normalized.
745
    ///
746
    /// This normalization ensures that source expressions are transformed into a
747
    /// consistent representation. This is beneficial for algorithms that rely on
748
    /// exact equalities, as it allows for more precise and reliable comparisons.
749
    ///
750
    /// # Parameters
751
    ///
752
    /// - `mapping`: A reference to the original `ProjectionMapping` to be normalized.
753
    ///
754
    /// # Returns
755
    ///
756
    /// A new `ProjectionMapping` with normalized source expressions.
757
52
    fn normalized_mapping(&self, mapping: &ProjectionMapping) -> ProjectionMapping {
758
52
        // Construct the mapping where source expressions are normalized. In this way
759
52
        // In the algorithms below we can work on exact equalities
760
52
        ProjectionMapping {
761
52
            map: mapping
762
52
                .iter()
763
61
                .map(|(source, target)| {
764
61
                    let normalized_source =
765
61
                        self.eq_group.normalize_expr(Arc::clone(source));
766
61
                    (normalized_source, Arc::clone(target))
767
61
                })
768
52
                .collect(),
769
52
        }
770
52
    }
771
772
    /// Computes projected orderings based on a given projection mapping.
773
    ///
774
    /// This function takes a `ProjectionMapping` and computes the possible
775
    /// orderings for the projected expressions. It considers dependencies
776
    /// between expressions and generates valid orderings according to the
777
    /// specified sort properties.
778
    ///
779
    /// # Parameters
780
    ///
781
    /// - `mapping`: A reference to the `ProjectionMapping` that defines the
782
    ///   relationship between source and target expressions.
783
    ///
784
    /// # Returns
785
    ///
786
    /// A vector of `LexOrdering` containing all valid orderings after projection.
787
52
    fn projected_orderings(&self, mapping: &ProjectionMapping) -> Vec<LexOrdering> {
788
52
        let mapping = self.normalized_mapping(mapping);
789
52
790
52
        // Get dependency map for existing orderings:
791
52
        let dependency_map = self.construct_dependency_map(&mapping);
792
61
        let orderings = mapping.iter().flat_map(|(source, target)| {
793
61
            referred_dependencies(&dependency_map, source)
794
61
                .into_iter()
795
61
                .filter_map(|relevant_deps| {
796
1
                    if let Ok(SortProperties::Ordered(options)) =
797
61
                        get_expr_properties(source, &relevant_deps, &self.schema)
798
61
                            .map(|prop| prop.sort_properties)
799
                    {
800
1
                        Some((options, relevant_deps))
801
                    } else {
802
                        // Do not consider unordered cases
803
60
                        None
804
                    }
805
61
                })
806
61
                .flat_map(|(options, relevant_deps)| {
807
1
                    let sort_expr = PhysicalSortExpr {
808
1
                        expr: Arc::clone(target),
809
1
                        options,
810
1
                    };
811
1
                    // Generate dependent orderings (i.e. prefixes for `sort_expr`):
812
1
                    let mut dependency_orderings =
813
1
                        generate_dependency_orderings(&relevant_deps, &dependency_map);
814
                    // Append `sort_expr` to the dependent orderings:
815
1
                    for ordering in dependency_orderings.iter_mut() {
816
1
                        ordering.push(sort_expr.clone());
817
1
                    }
818
1
                    dependency_orderings
819
61
                })
820
61
        });
821
52
822
52
        // Add valid projected orderings. For example, if existing ordering is
823
52
        // `a + b` and projection is `[a -> a_new, b -> b_new]`, we need to
824
52
        // preserve `a_new + b_new` as ordered. Please note that `a_new` and
825
52
        // `b_new` themselves need not be ordered. Such dependencies cannot be
826
52
        // deduced via the pass above.
827
52
        let projected_orderings = dependency_map.iter().flat_map(|(sort_expr, node)| {
828
1
            let mut prefixes = construct_prefix_orderings(sort_expr, &dependency_map);
829
1
            if prefixes.is_empty() {
830
1
                // If prefix is empty, there is no dependency. Insert
831
1
                // empty ordering:
832
1
                prefixes = vec![vec![]];
833
1
            }
0
834
            // Append current ordering on top its dependencies:
835
1
            for ordering in prefixes.iter_mut() {
836
1
                if let Some(target) = &node.target_sort_expr {
837
1
                    ordering.push(target.clone())
838
0
                }
839
            }
840
1
            prefixes
841
52
        });
842
52
843
52
        // Simplify each ordering by removing redundant sections:
844
52
        orderings
845
52
            .chain(projected_orderings)
846
52
            .map(collapse_lex_ordering)
847
52
            .collect()
848
52
    }
849
850
    /// Projects constants based on the provided `ProjectionMapping`.
851
    ///
852
    /// This function takes a `ProjectionMapping` and identifies/projects
853
    /// constants based on the existing constants and the mapping. It ensures
854
    /// that constants are appropriately propagated through the projection.
855
    ///
856
    /// # Arguments
857
    ///
858
    /// - `mapping`: A reference to a `ProjectionMapping` representing the
859
    ///   mapping of source expressions to target expressions in the projection.
860
    ///
861
    /// # Returns
862
    ///
863
    /// Returns a `Vec<Arc<dyn PhysicalExpr>>` containing the projected constants.
864
52
    fn projected_constants(&self, mapping: &ProjectionMapping) -> Vec<ConstExpr> {
865
52
        // First, project existing constants. For example, assume that `a + b`
866
52
        // is known to be constant. If the projection were `a as a_new`, `b as b_new`,
867
52
        // then we would project constant `a + b` as `a_new + b_new`.
868
52
        let mut projected_constants = self
869
52
            .constants
870
52
            .iter()
871
52
            .flat_map(|const_expr| {
872
0
                const_expr.map(|expr| self.eq_group.project_expr(mapping, expr))
873
52
            })
874
52
            .collect::<Vec<_>>();
875
        // Add projection expressions that are known to be constant:
876
61
        for (source, target) in 
mapping.iter()52
{
877
61
            if self.is_expr_constant(source)
878
1
                && !const_exprs_contains(&projected_constants, target)
879
1
            {
880
1
                // Expression evaluates to single value
881
1
                projected_constants
882
1
                    .push(ConstExpr::from(target).with_across_partitions(true));
883
60
            }
884
        }
885
52
        projected_constants
886
52
    }
887
888
    /// Projects the equivalences within according to `projection_mapping`
889
    /// and `output_schema`.
890
52
    pub fn project(
891
52
        &self,
892
52
        projection_mapping: &ProjectionMapping,
893
52
        output_schema: SchemaRef,
894
52
    ) -> Self {
895
52
        let projected_constants = self.projected_constants(projection_mapping);
896
52
        let projected_eq_group = self.eq_group.project(projection_mapping);
897
52
        let projected_orderings = self.projected_orderings(projection_mapping);
898
52
        Self {
899
52
            eq_group: projected_eq_group,
900
52
            oeq_class: OrderingEquivalenceClass::new(projected_orderings),
901
52
            constants: projected_constants,
902
52
            schema: output_schema,
903
52
        }
904
52
    }
905
906
    /// Returns the longest (potentially partial) permutation satisfying the
907
    /// existing ordering. For example, if we have the equivalent orderings
908
    /// `[a ASC, b ASC]` and `[c DESC]`, with `exprs` containing `[c, b, a, d]`,
909
    /// then this function returns `([a ASC, b ASC, c DESC], [2, 1, 0])`.
910
    /// This means that the specification `[a ASC, b ASC, c DESC]` is satisfied
911
    /// by the existing ordering, and `[a, b, c]` resides at indices: `2, 1, 0`
912
    /// inside the argument `exprs` (respectively). For the mathematical
913
    /// definition of "partial permutation", see:
914
    ///
915
    /// <https://en.wikipedia.org/wiki/Permutation#k-permutations_of_n>
916
200
    pub fn find_longest_permutation(
917
200
        &self,
918
200
        exprs: &[Arc<dyn PhysicalExpr>],
919
200
    ) -> (LexOrdering, Vec<usize>) {
920
200
        let mut eq_properties = self.clone();
921
200
        let mut result = vec![];
922
200
        // The algorithm is as follows:
923
200
        // - Iterate over all the expressions and insert ordered expressions
924
200
        //   into the result.
925
200
        // - Treat inserted expressions as constants (i.e. add them as constants
926
200
        //   to the state).
927
200
        // - Continue the above procedure until no expression is inserted; i.e.
928
200
        //   the algorithm reaches a fixed point.
929
200
        // This algorithm should reach a fixed point in at most `exprs.len()`
930
200
        // iterations.
931
200
        let mut search_indices = (0..exprs.len()).collect::<IndexSet<_>>();
932
234
        for _idx in 0..
exprs.len()200
{
933
            // Get ordered expressions with their indices.
934
234
            let ordered_exprs = search_indices
935
234
                .iter()
936
318
                .flat_map(|&idx| {
937
318
                    let ExprProperties {
938
318
                        sort_properties, ..
939
318
                    } = eq_properties.get_expr_properties(Arc::clone(&exprs[idx]));
940
318
                    match sort_properties {
941
76
                        SortProperties::Ordered(options) => Some((
942
76
                            PhysicalSortExpr {
943
76
                                expr: Arc::clone(&exprs[idx]),
944
76
                                options,
945
76
                            },
946
76
                            idx,
947
76
                        )),
948
                        SortProperties::Singleton => {
949
                            // Assign default ordering to constant expressions
950
2
                            let options = SortOptions::default();
951
2
                            Some((
952
2
                                PhysicalSortExpr {
953
2
                                    expr: Arc::clone(&exprs[idx]),
954
2
                                    options,
955
2
                                },
956
2
                                idx,
957
2
                            ))
958
                        }
959
240
                        SortProperties::Unordered => None,
960
                    }
961
318
                })
962
234
                .collect::<Vec<_>>();
963
234
            // We reached a fixed point, exit.
964
234
            if ordered_exprs.is_empty() {
965
156
                break;
966
78
            }
967
            // Remove indices that have an ordering from `search_indices`, and
968
            // treat ordered expressions as constants in subsequent iterations.
969
            // We can do this because the "next" key only matters in a lexicographical
970
            // ordering when the keys to its left have the same values.
971
            //
972
            // Note that these expressions are not properly "constants". This is just
973
            // an implementation strategy confined to this function.
974
156
            for (PhysicalSortExpr { 
expr, .. }, idx78
) in &ordered_exprs {
975
78
                eq_properties =
976
78
                    eq_properties.with_constants(std::iter::once(ConstExpr::from(expr)));
977
78
                search_indices.shift_remove(idx);
978
78
            }
979
            // Add new ordered section to the state.
980
78
            result.extend(ordered_exprs);
981
        }
982
200
        result.into_iter().unzip()
983
200
    }
984
985
    /// This function determines whether the provided expression is constant
986
    /// based on the known constants.
987
    ///
988
    /// # Arguments
989
    ///
990
    /// - `expr`: A reference to a `Arc<dyn PhysicalExpr>` representing the
991
    ///   expression to be checked.
992
    ///
993
    /// # Returns
994
    ///
995
    /// Returns `true` if the expression is constant according to equivalence
996
    /// group, `false` otherwise.
997
546
    pub fn is_expr_constant(&self, expr: &Arc<dyn PhysicalExpr>) -> bool {
998
546
        // As an example, assume that we know columns `a` and `b` are constant.
999
546
        // Then, `a`, `b` and `a + b` will all return `true` whereas `c` will
1000
546
        // return `false`.
1001
546
        let const_exprs = self
1002
546
            .constants
1003
546
            .iter()
1004
546
            .map(|const_expr| 
Arc::clone(const_expr.expr())190
);
1005
546
        let normalized_constants = self.eq_group.normalize_exprs(const_exprs);
1006
546
        let normalized_expr = self.eq_group.normalize_expr(Arc::clone(expr));
1007
546
        is_constant_recurse(&normalized_constants, &normalized_expr)
1008
546
    }
1009
1010
    /// Retrieves the properties for a given physical expression.
1011
    ///
1012
    /// This function constructs an [`ExprProperties`] object for the given
1013
    /// expression, which encapsulates information about the expression's
1014
    /// properties, including its [`SortProperties`] and [`Interval`].
1015
    ///
1016
    /// # Parameters
1017
    ///
1018
    /// - `expr`: An `Arc<dyn PhysicalExpr>` representing the physical expression
1019
    ///   for which ordering information is sought.
1020
    ///
1021
    /// # Returns
1022
    ///
1023
    /// Returns an [`ExprProperties`] object containing the ordering and range
1024
    /// information for the given expression.
1025
467
    pub fn get_expr_properties(&self, expr: Arc<dyn PhysicalExpr>) -> ExprProperties {
1026
467
        ExprPropertiesNode::new_unknown(expr)
1027
467
            .transform_up(|expr| update_properties(expr, self))
1028
467
            .data()
1029
467
            .map(|node| node.data)
1030
467
            .unwrap_or(ExprProperties::new_unknown())
1031
467
    }
1032
1033
    /// Transforms this `EquivalenceProperties` into a new `EquivalenceProperties`
1034
    /// by mapping columns in the original schema to columns in the new schema
1035
    /// by index.
1036
0
    pub fn with_new_schema(self, schema: SchemaRef) -> Result<Self> {
1037
        // The new schema and the original schema is aligned when they have the
1038
        // same number of columns, and fields at the same index have the same
1039
        // type in both schemas.
1040
0
        let schemas_aligned = (self.schema.fields.len() == schema.fields.len())
1041
0
            && self
1042
0
                .schema
1043
0
                .fields
1044
0
                .iter()
1045
0
                .zip(schema.fields.iter())
1046
0
                .all(|(lhs, rhs)| lhs.data_type().eq(rhs.data_type()));
1047
0
        if !schemas_aligned {
1048
            // Rewriting equivalence properties in terms of new schema is not
1049
            // safe when schemas are not aligned:
1050
0
            return plan_err!(
1051
0
                "Cannot rewrite old_schema:{:?} with new schema: {:?}",
1052
0
                self.schema,
1053
0
                schema
1054
0
            );
1055
0
        }
1056
        // Rewrite constants according to new schema:
1057
0
        let new_constants = self
1058
0
            .constants
1059
0
            .into_iter()
1060
0
            .map(|const_expr| {
1061
0
                let across_partitions = const_expr.across_partitions();
1062
0
                let new_const_expr = with_new_schema(const_expr.owned_expr(), &schema)?;
1063
0
                Ok(ConstExpr::new(new_const_expr)
1064
0
                    .with_across_partitions(across_partitions))
1065
0
            })
1066
0
            .collect::<Result<Vec<_>>>()?;
1067
1068
        // Rewrite orderings according to new schema:
1069
0
        let mut new_orderings = vec![];
1070
0
        for ordering in self.oeq_class.orderings {
1071
0
            let new_ordering = ordering
1072
0
                .into_iter()
1073
0
                .map(|mut sort_expr| {
1074
0
                    sort_expr.expr = with_new_schema(sort_expr.expr, &schema)?;
1075
0
                    Ok(sort_expr)
1076
0
                })
1077
0
                .collect::<Result<_>>()?;
1078
0
            new_orderings.push(new_ordering);
1079
        }
1080
1081
        // Rewrite equivalence classes according to the new schema:
1082
0
        let mut eq_classes = vec![];
1083
0
        for eq_class in self.eq_group.classes {
1084
0
            let new_eq_exprs = eq_class
1085
0
                .into_vec()
1086
0
                .into_iter()
1087
0
                .map(|expr| with_new_schema(expr, &schema))
1088
0
                .collect::<Result<_>>()?;
1089
0
            eq_classes.push(EquivalenceClass::new(new_eq_exprs));
1090
        }
1091
1092
        // Construct the resulting equivalence properties:
1093
0
        let mut result = EquivalenceProperties::new(schema);
1094
0
        result.constants = new_constants;
1095
0
        result.add_new_orderings(new_orderings);
1096
0
        result.add_equivalence_group(EquivalenceGroup::new(eq_classes));
1097
0
1098
0
        Ok(result)
1099
0
    }
1100
}
1101
1102
/// More readable display version of the `EquivalenceProperties`.
1103
///
1104
/// Format:
1105
/// ```text
1106
/// order: [[a ASC, b ASC], [a ASC, c ASC]], eq: [[a = b], [a = c]], const: [a = 1]
1107
/// ```
1108
impl Display for EquivalenceProperties {
1109
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1110
0
        if self.eq_group.is_empty()
1111
0
            && self.oeq_class.is_empty()
1112
0
            && self.constants.is_empty()
1113
        {
1114
0
            return write!(f, "No properties");
1115
0
        }
1116
0
        if !self.oeq_class.is_empty() {
1117
0
            write!(f, "order: {}", self.oeq_class)?;
1118
0
        }
1119
0
        if !self.eq_group.is_empty() {
1120
0
            write!(f, ", eq: {}", self.eq_group)?;
1121
0
        }
1122
0
        if !self.constants.is_empty() {
1123
0
            write!(f, ", const: [")?;
1124
0
            let mut iter = self.constants.iter();
1125
0
            if let Some(c) = iter.next() {
1126
0
                write!(f, "{}", c)?;
1127
0
            }
1128
0
            for c in iter {
1129
0
                write!(f, ", {}", c)?;
1130
            }
1131
0
            write!(f, "]")?;
1132
0
        }
1133
0
        Ok(())
1134
0
    }
1135
}
1136
1137
/// Calculates the properties of a given [`ExprPropertiesNode`].
1138
///
1139
/// Order information can be retrieved as:
1140
/// - If it is a leaf node, we directly find the order of the node by looking
1141
///   at the given sort expression and equivalence properties if it is a `Column`
1142
///   leaf, or we mark it as unordered. In the case of a `Literal` leaf, we mark
1143
///   it as singleton so that it can cooperate with all ordered columns.
1144
/// - If it is an intermediate node, the children states matter. Each `PhysicalExpr`
1145
///   and operator has its own rules on how to propagate the children orderings.
1146
///   However, before we engage in recursion, we check whether this intermediate
1147
///   node directly matches with the sort expression. If there is a match, the
1148
///   sort expression emerges at that node immediately, discarding the recursive
1149
///   result coming from its children.
1150
///
1151
/// Range information is calculated as:
1152
/// - If it is a `Literal` node, we set the range as a point value. If it is a
1153
///   `Column` node, we set the datatype of the range, but cannot give an interval
1154
///   for the range, yet.
1155
/// - If it is an intermediate node, the children states matter. Each `PhysicalExpr`
1156
///   and operator has its own rules on how to propagate the children range.
1157
467
fn update_properties(
1158
467
    mut node: ExprPropertiesNode,
1159
467
    eq_properties: &EquivalenceProperties,
1160
467
) -> Result<Transformed<ExprPropertiesNode>> {
1161
467
    // First, try to gather the information from the children:
1162
467
    if !node.expr.children().is_empty() {
1163
        // We have an intermediate (non-leaf) node, account for its children:
1164
0
        let children_props = node.children.iter().map(|c| c.data.clone()).collect_vec();
1165
0
        node.data = node.expr.get_properties(&children_props)?;
1166
467
    } else if node.expr.as_any().is::<Literal>() {
1167
        // We have a Literal, which is one of the two possible leaf node types:
1168
1
        node.data = node.expr.get_properties(&[])
?0
;
1169
466
    } else if node.expr.as_any().is::<Column>() {
1170
        // We have a Column, which is the other possible leaf node type:
1171
466
        node.data.range =
1172
466
            Interval::make_unbounded(&node.expr.data_type(eq_properties.schema())
?0
)
?0
1173
0
    }
1174
    // Now, check what we know about orderings:
1175
467
    let normalized_expr = eq_properties
1176
467
        .eq_group
1177
467
        .normalize_expr(Arc::clone(&node.expr));
1178
467
    if eq_properties.is_expr_constant(&normalized_expr) {
1179
2
        node.data.sort_properties = SortProperties::Singleton;
1180
465
    } else if let Some(
options150
) = eq_properties
1181
465
        .normalized_oeq_class()
1182
465
        .get_options(&normalized_expr)
1183
150
    {
1184
150
        node.data.sort_properties = SortProperties::Ordered(options);
1185
315
    }
1186
467
    Ok(Transformed::yes(node))
1187
467
}
1188
1189
/// This function determines whether the provided expression is constant
1190
/// based on the known constants.
1191
///
1192
/// # Arguments
1193
///
1194
/// - `constants`: A `&[Arc<dyn PhysicalExpr>]` containing expressions known to
1195
///   be a constant.
1196
/// - `expr`: A reference to a `Arc<dyn PhysicalExpr>` representing the expression
1197
///   to check.
1198
///
1199
/// # Returns
1200
///
1201
/// Returns `true` if the expression is constant according to equivalence
1202
/// group, `false` otherwise.
1203
546
fn is_constant_recurse(
1204
546
    constants: &[Arc<dyn PhysicalExpr>],
1205
546
    expr: &Arc<dyn PhysicalExpr>,
1206
546
) -> bool {
1207
546
    if physical_exprs_contains(constants, expr) || 
expr.as_any().is::<Literal>()545
{
1208
11
        return true;
1209
535
    }
1210
535
    let children = expr.children();
1211
535
    !children.is_empty() && 
children.iter().all(0
|c|
is_constant_recurse(constants, c)0
)
1212
546
}
1213
1214
/// This function examines whether a referring expression directly refers to a
1215
/// given referred expression or if any of its children in the expression tree
1216
/// refer to the specified expression.
1217
///
1218
/// # Parameters
1219
///
1220
/// - `referring_expr`: A reference to the referring expression (`Arc<dyn PhysicalExpr>`).
1221
/// - `referred_expr`: A reference to the referred expression (`Arc<dyn PhysicalExpr>`)
1222
///
1223
/// # Returns
1224
///
1225
/// A boolean value indicating whether `referring_expr` refers (needs it to evaluate its result)
1226
/// `referred_expr` or not.
1227
6
fn expr_refers(
1228
6
    referring_expr: &Arc<dyn PhysicalExpr>,
1229
6
    referred_expr: &Arc<dyn PhysicalExpr>,
1230
6
) -> bool {
1231
6
    referring_expr.eq(referred_expr)
1232
4
        || referring_expr
1233
4
            .children()
1234
4
            .iter()
1235
4
            .any(|child| 
expr_refers(child, referred_expr)0
)2
1236
6
}
1237
1238
/// This function analyzes the dependency map to collect referred dependencies for
1239
/// a given source expression.
1240
///
1241
/// # Parameters
1242
///
1243
/// - `dependency_map`: A reference to the `DependencyMap` where each
1244
///   `PhysicalSortExpr` is associated with a `DependencyNode`.
1245
/// - `source`: A reference to the source expression (`Arc<dyn PhysicalExpr>`)
1246
///   for which relevant dependencies need to be identified.
1247
///
1248
/// # Returns
1249
///
1250
/// A `Vec<Dependencies>` containing the dependencies for the given source
1251
/// expression. These dependencies are expressions that are referred to by
1252
/// the source expression based on the provided dependency map.
1253
61
fn referred_dependencies(
1254
61
    dependency_map: &DependencyMap,
1255
61
    source: &Arc<dyn PhysicalExpr>,
1256
61
) -> Vec<Dependencies> {
1257
61
    // Associate `PhysicalExpr`s with `PhysicalSortExpr`s that contain them:
1258
61
    let mut expr_to_sort_exprs = IndexMap::<ExprWrapper, Dependencies>::new();
1259
61
    for 
sort_expr1
in dependency_map
1260
61
        .keys()
1261
61
        .filter(|sort_expr| 
expr_refers(source, &sort_expr.expr)3
)
1262
1
    {
1263
1
        let key = ExprWrapper(Arc::clone(&sort_expr.expr));
1264
1
        expr_to_sort_exprs
1265
1
            .entry(key)
1266
1
            .or_default()
1267
1
            .insert(sort_expr.clone());
1268
1
    }
1269
1270
    // Generate all valid dependencies for the source. For example, if the source
1271
    // is `a + b` and the map is `[a -> (a ASC, a DESC), b -> (b ASC)]`, we get
1272
    // `vec![HashSet(a ASC, b ASC), HashSet(a DESC, b ASC)]`.
1273
61
    expr_to_sort_exprs
1274
61
        .values()
1275
61
        .multi_cartesian_product()
1276
61
        .map(|referred_deps| referred_deps.into_iter().cloned().collect())
1277
61
        .collect()
1278
61
}
1279
1280
/// This function retrieves the dependencies of the given relevant sort expression
1281
/// from the given dependency map. It then constructs prefix orderings by recursively
1282
/// analyzing the dependencies and include them in the orderings.
1283
///
1284
/// # Parameters
1285
///
1286
/// - `relevant_sort_expr`: A reference to the relevant sort expression
1287
///   (`PhysicalSortExpr`) for which prefix orderings are to be constructed.
1288
/// - `dependency_map`: A reference to the `DependencyMap` containing dependencies.
1289
///
1290
/// # Returns
1291
///
1292
/// A vector of prefix orderings (`Vec<LexOrdering>`) based on the given relevant
1293
/// sort expression and its dependencies.
1294
2
fn construct_prefix_orderings(
1295
2
    relevant_sort_expr: &PhysicalSortExpr,
1296
2
    dependency_map: &DependencyMap,
1297
2
) -> Vec<LexOrdering> {
1298
2
    let mut dep_enumerator = DependencyEnumerator::new();
1299
2
    dependency_map[relevant_sort_expr]
1300
2
        .dependencies
1301
2
        .iter()
1302
2
        .flat_map(|dep| 
dep_enumerator.construct_orderings(dep, dependency_map)0
)
1303
2
        .collect()
1304
2
}
1305
1306
/// Generates all possible orderings where dependencies are satisfied for the
1307
/// current projection expression.
1308
///
1309
/// # Examaple
1310
///  If `dependences` is `a + b ASC` and the dependency map holds dependencies
1311
///  * `a ASC` --> `[c ASC]`
1312
///  * `b ASC` --> `[d DESC]`,
1313
///
1314
/// This function generates these two sort orders
1315
/// * `[c ASC, d DESC, a + b ASC]`
1316
/// * `[d DESC, c ASC, a + b ASC]`
1317
///
1318
/// # Parameters
1319
///
1320
/// * `dependencies` - Set of relevant expressions.
1321
/// * `dependency_map` - Map of dependencies for expressions that may appear in `dependencies`
1322
///
1323
/// # Returns
1324
///
1325
/// A vector of lexical orderings (`Vec<LexOrdering>`) representing all valid orderings
1326
/// based on the given dependencies.
1327
1
fn generate_dependency_orderings(
1328
1
    dependencies: &Dependencies,
1329
1
    dependency_map: &DependencyMap,
1330
1
) -> Vec<LexOrdering> {
1331
1
    // Construct all the valid prefix orderings for each expression appearing
1332
1
    // in the projection:
1333
1
    let relevant_prefixes = dependencies
1334
1
        .iter()
1335
1
        .flat_map(|dep| {
1336
1
            let prefixes = construct_prefix_orderings(dep, dependency_map);
1337
1
            (!prefixes.is_empty()).then_some(prefixes)
1338
1
        })
1339
1
        .collect::<Vec<_>>();
1340
1
1341
1
    // No dependency, dependent is a leading ordering.
1342
1
    if relevant_prefixes.is_empty() {
1343
        // Return an empty ordering:
1344
1
        return vec![vec![]];
1345
0
    }
1346
0
1347
0
    relevant_prefixes
1348
0
        .into_iter()
1349
0
        .multi_cartesian_product()
1350
0
        .flat_map(|prefix_orderings| {
1351
0
            prefix_orderings
1352
0
                .iter()
1353
0
                .permutations(prefix_orderings.len())
1354
0
                .map(|prefixes| prefixes.into_iter().flatten().cloned().collect())
1355
0
                .collect::<Vec<_>>()
1356
0
        })
1357
0
        .collect()
1358
1
}
1359
1360
/// This function examines the given expression and its properties to determine
1361
/// the ordering properties of the expression. The range knowledge is not utilized
1362
/// yet in the scope of this function.
1363
///
1364
/// # Parameters
1365
///
1366
/// - `expr`: A reference to the source expression (`Arc<dyn PhysicalExpr>`) for
1367
///   which ordering properties need to be determined.
1368
/// - `dependencies`: A reference to `Dependencies`, containing sort expressions
1369
///   referred to by `expr`.
1370
/// - `schema``: A reference to the schema which the `expr` columns refer.
1371
///
1372
/// # Returns
1373
///
1374
/// A `SortProperties` indicating the ordering information of the given expression.
1375
61
fn get_expr_properties(
1376
61
    expr: &Arc<dyn PhysicalExpr>,
1377
61
    dependencies: &Dependencies,
1378
61
    schema: &SchemaRef,
1379
61
) -> Result<ExprProperties> {
1380
61
    if let Some(
column_order1
) = dependencies.iter().find(|&order|
expr.eq(&order.expr)1
) {
1381
        // If exact match is found, return its ordering.
1382
        Ok(ExprProperties {
1383
1
            sort_properties: SortProperties::Ordered(column_order.options),
1384
1
            range: Interval::make_unbounded(&expr.data_type(schema)
?0
)
?0
,
1385
        })
1386
60
    } else if expr.as_any().downcast_ref::<Column>().is_some() {
1387
        Ok(ExprProperties {
1388
59
            sort_properties: SortProperties::Unordered,
1389
59
            range: Interval::make_unbounded(&expr.data_type(schema)
?0
)
?0
,
1390
        })
1391
1
    } else if let Some(literal) = expr.as_any().downcast_ref::<Literal>() {
1392
        Ok(ExprProperties {
1393
1
            sort_properties: SortProperties::Singleton,
1394
1
            range: Interval::try_new(literal.value().clone(), literal.value().clone())
?0
,
1395
        })
1396
    } else {
1397
        // Find orderings of its children
1398
0
        let child_states = expr
1399
0
            .children()
1400
0
            .iter()
1401
0
            .map(|child| get_expr_properties(child, dependencies, schema))
1402
0
            .collect::<Result<Vec<_>>>()?;
1403
        // Calculate expression ordering using ordering of its children.
1404
0
        expr.get_properties(&child_states)
1405
    }
1406
61
}
1407
1408
/// Represents a node in the dependency map used to construct projected orderings.
1409
///
1410
/// A `DependencyNode` contains information about a particular sort expression,
1411
/// including its target sort expression and a set of dependencies on other sort
1412
/// expressions.
1413
///
1414
/// # Fields
1415
///
1416
/// - `target_sort_expr`: An optional `PhysicalSortExpr` representing the target
1417
///   sort expression associated with the node. It is `None` if the sort expression
1418
///   cannot be projected.
1419
/// - `dependencies`: A [`Dependencies`] containing dependencies on other sort
1420
///   expressions that are referred to by the target sort expression.
1421
#[derive(Debug, Clone, PartialEq, Eq)]
1422
struct DependencyNode {
1423
    target_sort_expr: Option<PhysicalSortExpr>,
1424
    dependencies: Dependencies,
1425
}
1426
1427
impl DependencyNode {
1428
    /// Insert dependency to the state (if exists).
1429
1
    fn insert_dependency(&mut self, dependency: Option<&PhysicalSortExpr>) {
1430
1
        if let Some(
dep0
) = dependency {
1431
0
            self.dependencies.insert(dep.clone());
1432
1
        }
1433
1
    }
1434
}
1435
1436
// Using `IndexMap` and `IndexSet` makes sure to generate consistent results across different executions for the same query.
1437
// We could have used `HashSet`, `HashMap` in place of them without any loss of functionality.
1438
// As an example, if existing orderings are `[a ASC, b ASC]`, `[c ASC]` for output ordering
1439
// both `[a ASC, b ASC, c ASC]` and `[c ASC, a ASC, b ASC]` are valid (e.g. concatenated version of the alternative orderings).
1440
// When using `HashSet`, `HashMap` it is not guaranteed to generate consistent result, among the possible 2 results in the example above.
1441
type DependencyMap = IndexMap<PhysicalSortExpr, DependencyNode>;
1442
type Dependencies = IndexSet<PhysicalSortExpr>;
1443
1444
/// Contains a mapping of all dependencies we have processed for each sort expr
1445
struct DependencyEnumerator<'a> {
1446
    /// Maps `expr` --> `[exprs]` that have previously been processed
1447
    seen: IndexMap<&'a PhysicalSortExpr, IndexSet<&'a PhysicalSortExpr>>,
1448
}
1449
1450
impl<'a> DependencyEnumerator<'a> {
1451
2
    fn new() -> Self {
1452
2
        Self {
1453
2
            seen: IndexMap::new(),
1454
2
        }
1455
2
    }
1456
1457
    /// Insert a new dependency,
1458
    ///
1459
    /// returns false if the dependency was already in the map
1460
    /// returns true if the dependency was newly inserted
1461
0
    fn insert(
1462
0
        &mut self,
1463
0
        target: &'a PhysicalSortExpr,
1464
0
        dep: &'a PhysicalSortExpr,
1465
0
    ) -> bool {
1466
0
        self.seen.entry(target).or_default().insert(dep)
1467
0
    }
1468
1469
    /// This function recursively analyzes the dependencies of the given sort
1470
    /// expression within the given dependency map to construct lexicographical
1471
    /// orderings that include the sort expression and its dependencies.
1472
    ///
1473
    /// # Parameters
1474
    ///
1475
    /// - `referred_sort_expr`: A reference to the sort expression (`PhysicalSortExpr`)
1476
    ///   for which lexicographical orderings satisfying its dependencies are to be
1477
    ///   constructed.
1478
    /// - `dependency_map`: A reference to the `DependencyMap` that contains
1479
    ///   dependencies for different `PhysicalSortExpr`s.
1480
    ///
1481
    /// # Returns
1482
    ///
1483
    /// A vector of lexicographical orderings (`Vec<LexOrdering>`) based on the given
1484
    /// sort expression and its dependencies.
1485
0
    fn construct_orderings(
1486
0
        &mut self,
1487
0
        referred_sort_expr: &'a PhysicalSortExpr,
1488
0
        dependency_map: &'a DependencyMap,
1489
0
    ) -> Vec<LexOrdering> {
1490
0
        // We are sure that `referred_sort_expr` is inside `dependency_map`.
1491
0
        let node = &dependency_map[referred_sort_expr];
1492
0
        // Since we work on intermediate nodes, we are sure `val.target_sort_expr`
1493
0
        // exists.
1494
0
        let target_sort_expr = node.target_sort_expr.as_ref().unwrap();
1495
0
        if node.dependencies.is_empty() {
1496
0
            return vec![vec![target_sort_expr.clone()]];
1497
0
        };
1498
0
1499
0
        node.dependencies
1500
0
            .iter()
1501
0
            .flat_map(|dep| {
1502
0
                let mut orderings = if self.insert(target_sort_expr, dep) {
1503
0
                    self.construct_orderings(dep, dependency_map)
1504
                } else {
1505
0
                    vec![]
1506
                };
1507
0
                for ordering in orderings.iter_mut() {
1508
0
                    ordering.push(target_sort_expr.clone())
1509
                }
1510
0
                orderings
1511
0
            })
1512
0
            .collect()
1513
0
    }
1514
}
1515
1516
/// Calculate ordering equivalence properties for the given join operation.
1517
1.14k
pub fn join_equivalence_properties(
1518
1.14k
    left: EquivalenceProperties,
1519
1.14k
    right: EquivalenceProperties,
1520
1.14k
    join_type: &JoinType,
1521
1.14k
    join_schema: SchemaRef,
1522
1.14k
    maintains_input_order: &[bool],
1523
1.14k
    probe_side: Option<JoinSide>,
1524
1.14k
    on: &[(PhysicalExprRef, PhysicalExprRef)],
1525
1.14k
) -> EquivalenceProperties {
1526
1.14k
    let left_size = left.schema.fields.len();
1527
1.14k
    let mut result = EquivalenceProperties::new(join_schema);
1528
1.14k
    result.add_equivalence_group(left.eq_group().join(
1529
1.14k
        right.eq_group(),
1530
1.14k
        join_type,
1531
1.14k
        left_size,
1532
1.14k
        on,
1533
1.14k
    ));
1534
1.14k
1535
1.14k
    let EquivalenceProperties {
1536
1.14k
        constants: left_constants,
1537
1.14k
        oeq_class: left_oeq_class,
1538
1.14k
        ..
1539
1.14k
    } = left;
1540
1.14k
    let EquivalenceProperties {
1541
1.14k
        constants: right_constants,
1542
1.14k
        oeq_class: mut right_oeq_class,
1543
1.14k
        ..
1544
1.14k
    } = right;
1545
1.14k
    match maintains_input_order {
1546
1.14k
        [true, false] => {
1547
            // In this special case, right side ordering can be prefixed with
1548
            // the left side ordering.
1549
54
            if let (Some(JoinSide::Left), JoinType::Inner) = (probe_side, join_type) {
1550
19
                updated_right_ordering_equivalence_class(
1551
19
                    &mut right_oeq_class,
1552
19
                    join_type,
1553
19
                    left_size,
1554
19
                );
1555
19
1556
19
                // Right side ordering equivalence properties should be prepended
1557
19
                // with those of the left side while constructing output ordering
1558
19
                // equivalence properties since stream side is the left side.
1559
19
                //
1560
19
                // For example, if the right side ordering equivalences contain
1561
19
                // `b ASC`, and the left side ordering equivalences contain `a ASC`,
1562
19
                // then we should add `a ASC, b ASC` to the ordering equivalences
1563
19
                // of the join output.
1564
19
                let out_oeq_class = left_oeq_class.join_suffix(&right_oeq_class);
1565
19
                result.add_ordering_equivalence_class(out_oeq_class);
1566
35
            } else {
1567
35
                result.add_ordering_equivalence_class(left_oeq_class);
1568
35
            }
1569
        }
1570
        [false, true] => {
1571
400
            updated_right_ordering_equivalence_class(
1572
400
                &mut right_oeq_class,
1573
400
                join_type,
1574
400
                left_size,
1575
400
            );
1576
400
            // In this special case, left side ordering can be prefixed with
1577
400
            // the right side ordering.
1578
400
            if let (Some(JoinSide::Right), JoinType::Inner) = (probe_side, join_type) {
1579
103
                // Left side ordering equivalence properties should be prepended
1580
103
                // with those of the right side while constructing output ordering
1581
103
                // equivalence properties since stream side is the right side.
1582
103
                //
1583
103
                // For example, if the left side ordering equivalences contain
1584
103
                // `a ASC`, and the right side ordering equivalences contain `b ASC`,
1585
103
                // then we should add `b ASC, a ASC` to the ordering equivalences
1586
103
                // of the join output.
1587
103
                let out_oeq_class = right_oeq_class.join_suffix(&left_oeq_class);
1588
103
                result.add_ordering_equivalence_class(out_oeq_class);
1589
297
            } else {
1590
297
                result.add_ordering_equivalence_class(right_oeq_class);
1591
297
            }
1592
        }
1593
695
        [false, false] => {}
1594
0
        [true, true] => unreachable!("Cannot maintain ordering of both sides"),
1595
0
        _ => unreachable!("Join operators can not have more than two children"),
1596
    }
1597
1.14k
    match join_type {
1598
268
        JoinType::LeftAnti | JoinType::LeftSemi => {
1599
268
            result = result.with_constants(left_constants);
1600
268
        }
1601
264
        JoinType::RightAnti | JoinType::RightSemi => {
1602
264
            result = result.with_constants(right_constants);
1603
264
        }
1604
617
        _ => {}
1605
    }
1606
1.14k
    result
1607
1.14k
}
1608
1609
/// In the context of a join, update the right side `OrderingEquivalenceClass`
1610
/// so that they point to valid indices in the join output schema.
1611
///
1612
/// To do so, we increment column indices by the size of the left table when
1613
/// join schema consists of a combination of the left and right schemas. This
1614
/// is the case for `Inner`, `Left`, `Full` and `Right` joins. For other cases,
1615
/// indices do not change.
1616
419
fn updated_right_ordering_equivalence_class(
1617
419
    right_oeq_class: &mut OrderingEquivalenceClass,
1618
419
    join_type: &JoinType,
1619
419
    left_size: usize,
1620
419
) {
1621
182
    if matches!(
1622
419
        join_type,
1623
        JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right
1624
237
    ) {
1625
237
        right_oeq_class.add_offset(left_size);
1626
237
    }
182
1627
419
}
1628
1629
/// Wrapper struct for `Arc<dyn PhysicalExpr>` to use them as keys in a hash map.
1630
#[derive(Debug, Clone)]
1631
struct ExprWrapper(Arc<dyn PhysicalExpr>);
1632
1633
impl PartialEq<Self> for ExprWrapper {
1634
0
    fn eq(&self, other: &Self) -> bool {
1635
0
        self.0.eq(&other.0)
1636
0
    }
1637
}
1638
1639
impl Eq for ExprWrapper {}
1640
1641
impl Hash for ExprWrapper {
1642
1
    fn hash<H: Hasher>(&self, state: &mut H) {
1643
1
        self.0.hash(state);
1644
1
    }
1645
}
1646
1647
/// Calculates the union (in the sense of `UnionExec`) `EquivalenceProperties`
1648
/// of  `lhs` and `rhs` according to the schema of `lhs`.
1649
5
fn calculate_union_binary(
1650
5
    lhs: EquivalenceProperties,
1651
5
    mut rhs: EquivalenceProperties,
1652
5
) -> Result<EquivalenceProperties> {
1653
5
    // TODO: In some cases, we should be able to preserve some equivalence
1654
5
    //       classes. Add support for such cases.
1655
5
1656
5
    // Harmonize the schema of the rhs with the schema of the lhs (which is the accumulator schema):
1657
5
    if !rhs.schema.eq(&lhs.schema) {
1658
0
        rhs = rhs.with_new_schema(Arc::clone(&lhs.schema))?;
1659
5
    }
1660
1661
    // First, calculate valid constants for the union. A quantity is constant
1662
    // after the union if it is constant in both sides.
1663
5
    let constants = lhs
1664
5
        .constants()
1665
5
        .iter()
1666
5
        .filter(|const_expr| 
const_exprs_contains(rhs.constants(), const_expr.expr())0
)
1667
5
        .map(|const_expr| {
1668
0
            // TODO: When both sides' constants are valid across partitions,
1669
0
            //       the union's constant should also be valid if values are
1670
0
            //       the same. However, we do not have the capability to
1671
0
            //       check this yet.
1672
0
            ConstExpr::new(Arc::clone(const_expr.expr())).with_across_partitions(false)
1673
5
        })
1674
5
        .collect();
1675
5
1676
5
    // Next, calculate valid orderings for the union by searching for prefixes
1677
5
    // in both sides.
1678
5
    let mut orderings = vec![];
1679
5
    for 
mut ordering4
in lhs.normalized_oeq_class().orderings {
1680
        // Progressively shorten the ordering to search for a satisfied prefix:
1681
6
        while !rhs.ordering_satisfy(&ordering) {
1682
2
            ordering.pop();
1683
2
        }
1684
        // There is a non-trivial satisfied prefix, add it as a valid ordering:
1685
4
        if !ordering.is_empty() {
1686
3
            orderings.push(ordering);
1687
3
        }
1
1688
    }
1689
5
    for mut ordering in rhs.normalized_oeq_class().orderings {
1690
        // Progressively shorten the ordering to search for a satisfied prefix:
1691
8
        while !lhs.ordering_satisfy(&ordering) {
1692
3
            ordering.pop();
1693
3
        }
1694
        // There is a non-trivial satisfied prefix, add it as a valid ordering:
1695
5
        if !ordering.is_empty() {
1696
4
            orderings.push(ordering);
1697
4
        }
1
1698
    }
1699
5
    let mut eq_properties = EquivalenceProperties::new(lhs.schema);
1700
5
    eq_properties.constants = constants;
1701
5
    eq_properties.add_new_orderings(orderings);
1702
5
    Ok(eq_properties)
1703
5
}
1704
1705
/// Calculates the union (in the sense of `UnionExec`) `EquivalenceProperties`
1706
/// of the given `EquivalenceProperties` in `eqps` according to the given
1707
/// output `schema` (which need not be the same with those of `lhs` and `rhs`
1708
/// as details such as nullability may be different).
1709
5
pub fn calculate_union(
1710
5
    eqps: Vec<EquivalenceProperties>,
1711
5
    schema: SchemaRef,
1712
5
) -> Result<EquivalenceProperties> {
1713
5
    // TODO: In some cases, we should be able to preserve some equivalence
1714
5
    //       classes. Add support for such cases.
1715
5
    let mut iter = eqps.into_iter();
1716
5
    let Some(mut acc) = iter.next() else {
1717
0
        return internal_err!(
1718
0
            "Cannot calculate EquivalenceProperties for a union with no inputs"
1719
0
        );
1720
    };
1721
1722
    // Harmonize the schema of the init with the schema of the union:
1723
5
    if !acc.schema.eq(&schema) {
1724
0
        acc = acc.with_new_schema(schema)?;
1725
5
    }
1726
    // Fold in the rest of the EquivalenceProperties:
1727
10
    for 
props5
in iter {
1728
5
        acc = calculate_union_binary(acc, props)
?0
;
1729
    }
1730
5
    Ok(acc)
1731
5
}
1732
1733
#[cfg(test)]
1734
mod tests {
1735
    use std::ops::Not;
1736
1737
    use super::*;
1738
    use crate::equivalence::add_offset_to_expr;
1739
    use crate::equivalence::tests::{
1740
        convert_to_orderings, convert_to_sort_exprs, convert_to_sort_reqs,
1741
        create_random_schema, create_test_params, create_test_schema,
1742
        generate_table_for_eq_properties, is_table_same_after_sort, output_schema,
1743
    };
1744
    use crate::expressions::{col, BinaryExpr, Column};
1745
    use crate::utils::tests::TestScalarUDF;
1746
1747
    use arrow::datatypes::{DataType, Field, Schema};
1748
    use arrow_schema::{Fields, TimeUnit};
1749
    use datafusion_common::DFSchema;
1750
    use datafusion_expr::{Operator, ScalarUDF};
1751
1752
    #[test]
1753
    fn project_equivalence_properties_test() -> Result<()> {
1754
        let input_schema = Arc::new(Schema::new(vec![
1755
            Field::new("a", DataType::Int64, true),
1756
            Field::new("b", DataType::Int64, true),
1757
            Field::new("c", DataType::Int64, true),
1758
        ]));
1759
1760
        let input_properties = EquivalenceProperties::new(Arc::clone(&input_schema));
1761
        let col_a = col("a", &input_schema)?;
1762
1763
        // a as a1, a as a2, a as a3, a as a3
1764
        let proj_exprs = vec![
1765
            (Arc::clone(&col_a), "a1".to_string()),
1766
            (Arc::clone(&col_a), "a2".to_string()),
1767
            (Arc::clone(&col_a), "a3".to_string()),
1768
            (Arc::clone(&col_a), "a4".to_string()),
1769
        ];
1770
        let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?;
1771
1772
        let out_schema = output_schema(&projection_mapping, &input_schema)?;
1773
        // a as a1, a as a2, a as a3, a as a3
1774
        let proj_exprs = vec![
1775
            (Arc::clone(&col_a), "a1".to_string()),
1776
            (Arc::clone(&col_a), "a2".to_string()),
1777
            (Arc::clone(&col_a), "a3".to_string()),
1778
            (Arc::clone(&col_a), "a4".to_string()),
1779
        ];
1780
        let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?;
1781
1782
        // a as a1, a as a2, a as a3, a as a3
1783
        let col_a1 = &col("a1", &out_schema)?;
1784
        let col_a2 = &col("a2", &out_schema)?;
1785
        let col_a3 = &col("a3", &out_schema)?;
1786
        let col_a4 = &col("a4", &out_schema)?;
1787
        let out_properties = input_properties.project(&projection_mapping, out_schema);
1788
1789
        // At the output a1=a2=a3=a4
1790
        assert_eq!(out_properties.eq_group().len(), 1);
1791
        let eq_class = &out_properties.eq_group().classes[0];
1792
        assert_eq!(eq_class.len(), 4);
1793
        assert!(eq_class.contains(col_a1));
1794
        assert!(eq_class.contains(col_a2));
1795
        assert!(eq_class.contains(col_a3));
1796
        assert!(eq_class.contains(col_a4));
1797
1798
        Ok(())
1799
    }
1800
1801
    #[test]
1802
    fn project_equivalence_properties_test_multi() -> Result<()> {
1803
        // test multiple input orderings with equivalence properties
1804
        let input_schema = Arc::new(Schema::new(vec![
1805
            Field::new("a", DataType::Int64, true),
1806
            Field::new("b", DataType::Int64, true),
1807
            Field::new("c", DataType::Int64, true),
1808
            Field::new("d", DataType::Int64, true),
1809
        ]));
1810
1811
        let mut input_properties = EquivalenceProperties::new(Arc::clone(&input_schema));
1812
        // add equivalent ordering [a, b, c, d]
1813
        input_properties.add_new_ordering(vec![
1814
            parse_sort_expr("a", &input_schema),
1815
            parse_sort_expr("b", &input_schema),
1816
            parse_sort_expr("c", &input_schema),
1817
            parse_sort_expr("d", &input_schema),
1818
        ]);
1819
1820
        // add equivalent ordering [a, c, b, d]
1821
        input_properties.add_new_ordering(vec![
1822
            parse_sort_expr("a", &input_schema),
1823
            parse_sort_expr("c", &input_schema),
1824
            parse_sort_expr("b", &input_schema), // NB b and c are swapped
1825
            parse_sort_expr("d", &input_schema),
1826
        ]);
1827
1828
        // simply project all the columns in order
1829
        let proj_exprs = vec![
1830
            (col("a", &input_schema)?, "a".to_string()),
1831
            (col("b", &input_schema)?, "b".to_string()),
1832
            (col("c", &input_schema)?, "c".to_string()),
1833
            (col("d", &input_schema)?, "d".to_string()),
1834
        ];
1835
        let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?;
1836
        let out_properties = input_properties.project(&projection_mapping, input_schema);
1837
1838
        assert_eq!(
1839
            out_properties.to_string(),
1840
            "order: [[a@0 ASC,c@2 ASC,b@1 ASC,d@3 ASC], [a@0 ASC,b@1 ASC,c@2 ASC,d@3 ASC]]"
1841
        );
1842
1843
        Ok(())
1844
    }
1845
1846
    #[test]
1847
    fn test_join_equivalence_properties() -> Result<()> {
1848
        let schema = create_test_schema()?;
1849
        let col_a = &col("a", &schema)?;
1850
        let col_b = &col("b", &schema)?;
1851
        let col_c = &col("c", &schema)?;
1852
        let offset = schema.fields.len();
1853
        let col_a2 = &add_offset_to_expr(Arc::clone(col_a), offset);
1854
        let col_b2 = &add_offset_to_expr(Arc::clone(col_b), offset);
1855
        let option_asc = SortOptions {
1856
            descending: false,
1857
            nulls_first: false,
1858
        };
1859
        let test_cases = vec![
1860
            // ------- TEST CASE 1 --------
1861
            // [a ASC], [b ASC]
1862
            (
1863
                // [a ASC], [b ASC]
1864
                vec![vec![(col_a, option_asc)], vec![(col_b, option_asc)]],
1865
                // [a ASC], [b ASC]
1866
                vec![vec![(col_a, option_asc)], vec![(col_b, option_asc)]],
1867
                // expected [a ASC, a2 ASC], [a ASC, b2 ASC], [b ASC, a2 ASC], [b ASC, b2 ASC]
1868
                vec![
1869
                    vec![(col_a, option_asc), (col_a2, option_asc)],
1870
                    vec![(col_a, option_asc), (col_b2, option_asc)],
1871
                    vec![(col_b, option_asc), (col_a2, option_asc)],
1872
                    vec![(col_b, option_asc), (col_b2, option_asc)],
1873
                ],
1874
            ),
1875
            // ------- TEST CASE 2 --------
1876
            // [a ASC], [b ASC]
1877
            (
1878
                // [a ASC], [b ASC], [c ASC]
1879
                vec![
1880
                    vec![(col_a, option_asc)],
1881
                    vec![(col_b, option_asc)],
1882
                    vec![(col_c, option_asc)],
1883
                ],
1884
                // [a ASC], [b ASC]
1885
                vec![vec![(col_a, option_asc)], vec![(col_b, option_asc)]],
1886
                // expected [a ASC, a2 ASC], [a ASC, b2 ASC], [b ASC, a2 ASC], [b ASC, b2 ASC], [c ASC, a2 ASC], [c ASC, b2 ASC]
1887
                vec![
1888
                    vec![(col_a, option_asc), (col_a2, option_asc)],
1889
                    vec![(col_a, option_asc), (col_b2, option_asc)],
1890
                    vec![(col_b, option_asc), (col_a2, option_asc)],
1891
                    vec![(col_b, option_asc), (col_b2, option_asc)],
1892
                    vec![(col_c, option_asc), (col_a2, option_asc)],
1893
                    vec![(col_c, option_asc), (col_b2, option_asc)],
1894
                ],
1895
            ),
1896
        ];
1897
        for (left_orderings, right_orderings, expected) in test_cases {
1898
            let mut left_eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
1899
            let mut right_eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
1900
            let left_orderings = convert_to_orderings(&left_orderings);
1901
            let right_orderings = convert_to_orderings(&right_orderings);
1902
            let expected = convert_to_orderings(&expected);
1903
            left_eq_properties.add_new_orderings(left_orderings);
1904
            right_eq_properties.add_new_orderings(right_orderings);
1905
            let join_eq = join_equivalence_properties(
1906
                left_eq_properties,
1907
                right_eq_properties,
1908
                &JoinType::Inner,
1909
                Arc::new(Schema::empty()),
1910
                &[true, false],
1911
                Some(JoinSide::Left),
1912
                &[],
1913
            );
1914
            let orderings = &join_eq.oeq_class.orderings;
1915
            let err_msg = format!("expected: {:?}, actual:{:?}", expected, orderings);
1916
            assert_eq!(
1917
                join_eq.oeq_class.orderings.len(),
1918
                expected.len(),
1919
                "{}",
1920
                err_msg
1921
            );
1922
            for ordering in orderings {
1923
                assert!(
1924
                    expected.contains(ordering),
1925
                    "{}, ordering: {:?}",
1926
                    err_msg,
1927
                    ordering
1928
                );
1929
            }
1930
        }
1931
        Ok(())
1932
    }
1933
1934
    #[test]
1935
    fn test_expr_consists_of_constants() -> Result<()> {
1936
        let schema = Arc::new(Schema::new(vec![
1937
            Field::new("a", DataType::Int32, true),
1938
            Field::new("b", DataType::Int32, true),
1939
            Field::new("c", DataType::Int32, true),
1940
            Field::new("d", DataType::Int32, true),
1941
            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true),
1942
        ]));
1943
        let col_a = col("a", &schema)?;
1944
        let col_b = col("b", &schema)?;
1945
        let col_d = col("d", &schema)?;
1946
        let b_plus_d = Arc::new(BinaryExpr::new(
1947
            Arc::clone(&col_b),
1948
            Operator::Plus,
1949
            Arc::clone(&col_d),
1950
        )) as Arc<dyn PhysicalExpr>;
1951
1952
        let constants = vec![Arc::clone(&col_a), Arc::clone(&col_b)];
1953
        let expr = Arc::clone(&b_plus_d);
1954
        assert!(!is_constant_recurse(&constants, &expr));
1955
1956
        let constants = vec![Arc::clone(&col_a), Arc::clone(&col_b), Arc::clone(&col_d)];
1957
        let expr = Arc::clone(&b_plus_d);
1958
        assert!(is_constant_recurse(&constants, &expr));
1959
        Ok(())
1960
    }
1961
1962
    #[test]
1963
    fn test_get_updated_right_ordering_equivalence_properties() -> Result<()> {
1964
        let join_type = JoinType::Inner;
1965
        // Join right child schema
1966
        let child_fields: Fields = ["x", "y", "z", "w"]
1967
            .into_iter()
1968
            .map(|name| Field::new(name, DataType::Int32, true))
1969
            .collect();
1970
        let child_schema = Schema::new(child_fields);
1971
        let col_x = &col("x", &child_schema)?;
1972
        let col_y = &col("y", &child_schema)?;
1973
        let col_z = &col("z", &child_schema)?;
1974
        let col_w = &col("w", &child_schema)?;
1975
        let option_asc = SortOptions {
1976
            descending: false,
1977
            nulls_first: false,
1978
        };
1979
        // [x ASC, y ASC], [z ASC, w ASC]
1980
        let orderings = vec![
1981
            vec![(col_x, option_asc), (col_y, option_asc)],
1982
            vec![(col_z, option_asc), (col_w, option_asc)],
1983
        ];
1984
        let orderings = convert_to_orderings(&orderings);
1985
        // Right child ordering equivalences
1986
        let mut right_oeq_class = OrderingEquivalenceClass::new(orderings);
1987
1988
        let left_columns_len = 4;
1989
1990
        let fields: Fields = ["a", "b", "c", "d", "x", "y", "z", "w"]
1991
            .into_iter()
1992
            .map(|name| Field::new(name, DataType::Int32, true))
1993
            .collect();
1994
1995
        // Join Schema
1996
        let schema = Schema::new(fields);
1997
        let col_a = &col("a", &schema)?;
1998
        let col_d = &col("d", &schema)?;
1999
        let col_x = &col("x", &schema)?;
2000
        let col_y = &col("y", &schema)?;
2001
        let col_z = &col("z", &schema)?;
2002
        let col_w = &col("w", &schema)?;
2003
2004
        let mut join_eq_properties = EquivalenceProperties::new(Arc::new(schema));
2005
        // a=x and d=w
2006
        join_eq_properties.add_equal_conditions(col_a, col_x)?;
2007
        join_eq_properties.add_equal_conditions(col_d, col_w)?;
2008
2009
        updated_right_ordering_equivalence_class(
2010
            &mut right_oeq_class,
2011
            &join_type,
2012
            left_columns_len,
2013
        );
2014
        join_eq_properties.add_ordering_equivalence_class(right_oeq_class);
2015
        let result = join_eq_properties.oeq_class().clone();
2016
2017
        // [x ASC, y ASC], [z ASC, w ASC]
2018
        let orderings = vec![
2019
            vec![(col_x, option_asc), (col_y, option_asc)],
2020
            vec![(col_z, option_asc), (col_w, option_asc)],
2021
        ];
2022
        let orderings = convert_to_orderings(&orderings);
2023
        let expected = OrderingEquivalenceClass::new(orderings);
2024
2025
        assert_eq!(result, expected);
2026
2027
        Ok(())
2028
    }
2029
2030
    #[test]
2031
    fn test_normalize_ordering_equivalence_classes() -> Result<()> {
2032
        let sort_options = SortOptions::default();
2033
2034
        let schema = Schema::new(vec![
2035
            Field::new("a", DataType::Int32, true),
2036
            Field::new("b", DataType::Int32, true),
2037
            Field::new("c", DataType::Int32, true),
2038
        ]);
2039
        let col_a_expr = col("a", &schema)?;
2040
        let col_b_expr = col("b", &schema)?;
2041
        let col_c_expr = col("c", &schema)?;
2042
        let mut eq_properties = EquivalenceProperties::new(Arc::new(schema.clone()));
2043
2044
        eq_properties.add_equal_conditions(&col_a_expr, &col_c_expr)?;
2045
        let others = vec![
2046
            vec![PhysicalSortExpr {
2047
                expr: Arc::clone(&col_b_expr),
2048
                options: sort_options,
2049
            }],
2050
            vec![PhysicalSortExpr {
2051
                expr: Arc::clone(&col_c_expr),
2052
                options: sort_options,
2053
            }],
2054
        ];
2055
        eq_properties.add_new_orderings(others);
2056
2057
        let mut expected_eqs = EquivalenceProperties::new(Arc::new(schema));
2058
        expected_eqs.add_new_orderings([
2059
            vec![PhysicalSortExpr {
2060
                expr: Arc::clone(&col_b_expr),
2061
                options: sort_options,
2062
            }],
2063
            vec![PhysicalSortExpr {
2064
                expr: Arc::clone(&col_c_expr),
2065
                options: sort_options,
2066
            }],
2067
        ]);
2068
2069
        let oeq_class = eq_properties.oeq_class().clone();
2070
        let expected = expected_eqs.oeq_class();
2071
        assert!(oeq_class.eq(expected));
2072
2073
        Ok(())
2074
    }
2075
2076
    #[test]
2077
    fn test_get_indices_of_matching_sort_exprs_with_order_eq() -> Result<()> {
2078
        let sort_options = SortOptions::default();
2079
        let sort_options_not = SortOptions::default().not();
2080
2081
        let schema = Schema::new(vec![
2082
            Field::new("a", DataType::Int32, true),
2083
            Field::new("b", DataType::Int32, true),
2084
        ]);
2085
        let col_a = &col("a", &schema)?;
2086
        let col_b = &col("b", &schema)?;
2087
        let required_columns = [Arc::clone(col_b), Arc::clone(col_a)];
2088
        let mut eq_properties = EquivalenceProperties::new(Arc::new(schema));
2089
        eq_properties.add_new_orderings([vec![
2090
            PhysicalSortExpr {
2091
                expr: Arc::new(Column::new("b", 1)),
2092
                options: sort_options_not,
2093
            },
2094
            PhysicalSortExpr {
2095
                expr: Arc::new(Column::new("a", 0)),
2096
                options: sort_options,
2097
            },
2098
        ]]);
2099
        let (result, idxs) = eq_properties.find_longest_permutation(&required_columns);
2100
        assert_eq!(idxs, vec![0, 1]);
2101
        assert_eq!(
2102
            result,
2103
            vec![
2104
                PhysicalSortExpr {
2105
                    expr: Arc::clone(col_b),
2106
                    options: sort_options_not
2107
                },
2108
                PhysicalSortExpr {
2109
                    expr: Arc::clone(col_a),
2110
                    options: sort_options
2111
                }
2112
            ]
2113
        );
2114
2115
        let schema = Schema::new(vec![
2116
            Field::new("a", DataType::Int32, true),
2117
            Field::new("b", DataType::Int32, true),
2118
            Field::new("c", DataType::Int32, true),
2119
        ]);
2120
        let col_a = &col("a", &schema)?;
2121
        let col_b = &col("b", &schema)?;
2122
        let required_columns = [Arc::clone(col_b), Arc::clone(col_a)];
2123
        let mut eq_properties = EquivalenceProperties::new(Arc::new(schema));
2124
        eq_properties.add_new_orderings([
2125
            vec![PhysicalSortExpr {
2126
                expr: Arc::new(Column::new("c", 2)),
2127
                options: sort_options,
2128
            }],
2129
            vec![
2130
                PhysicalSortExpr {
2131
                    expr: Arc::new(Column::new("b", 1)),
2132
                    options: sort_options_not,
2133
                },
2134
                PhysicalSortExpr {
2135
                    expr: Arc::new(Column::new("a", 0)),
2136
                    options: sort_options,
2137
                },
2138
            ],
2139
        ]);
2140
        let (result, idxs) = eq_properties.find_longest_permutation(&required_columns);
2141
        assert_eq!(idxs, vec![0, 1]);
2142
        assert_eq!(
2143
            result,
2144
            vec![
2145
                PhysicalSortExpr {
2146
                    expr: Arc::clone(col_b),
2147
                    options: sort_options_not
2148
                },
2149
                PhysicalSortExpr {
2150
                    expr: Arc::clone(col_a),
2151
                    options: sort_options
2152
                }
2153
            ]
2154
        );
2155
2156
        let required_columns = [
2157
            Arc::new(Column::new("b", 1)) as _,
2158
            Arc::new(Column::new("a", 0)) as _,
2159
        ];
2160
        let schema = Schema::new(vec![
2161
            Field::new("a", DataType::Int32, true),
2162
            Field::new("b", DataType::Int32, true),
2163
            Field::new("c", DataType::Int32, true),
2164
        ]);
2165
        let mut eq_properties = EquivalenceProperties::new(Arc::new(schema));
2166
2167
        // not satisfied orders
2168
        eq_properties.add_new_orderings([vec![
2169
            PhysicalSortExpr {
2170
                expr: Arc::new(Column::new("b", 1)),
2171
                options: sort_options_not,
2172
            },
2173
            PhysicalSortExpr {
2174
                expr: Arc::new(Column::new("c", 2)),
2175
                options: sort_options,
2176
            },
2177
            PhysicalSortExpr {
2178
                expr: Arc::new(Column::new("a", 0)),
2179
                options: sort_options,
2180
            },
2181
        ]]);
2182
        let (_, idxs) = eq_properties.find_longest_permutation(&required_columns);
2183
        assert_eq!(idxs, vec![0]);
2184
2185
        Ok(())
2186
    }
2187
2188
    #[test]
2189
    fn test_update_properties() -> Result<()> {
2190
        let schema = Schema::new(vec![
2191
            Field::new("a", DataType::Int32, true),
2192
            Field::new("b", DataType::Int32, true),
2193
            Field::new("c", DataType::Int32, true),
2194
            Field::new("d", DataType::Int32, true),
2195
        ]);
2196
2197
        let mut eq_properties = EquivalenceProperties::new(Arc::new(schema.clone()));
2198
        let col_a = &col("a", &schema)?;
2199
        let col_b = &col("b", &schema)?;
2200
        let col_c = &col("c", &schema)?;
2201
        let col_d = &col("d", &schema)?;
2202
        let option_asc = SortOptions {
2203
            descending: false,
2204
            nulls_first: false,
2205
        };
2206
        // b=a (e.g they are aliases)
2207
        eq_properties.add_equal_conditions(col_b, col_a)?;
2208
        // [b ASC], [d ASC]
2209
        eq_properties.add_new_orderings(vec![
2210
            vec![PhysicalSortExpr {
2211
                expr: Arc::clone(col_b),
2212
                options: option_asc,
2213
            }],
2214
            vec![PhysicalSortExpr {
2215
                expr: Arc::clone(col_d),
2216
                options: option_asc,
2217
            }],
2218
        ]);
2219
2220
        let test_cases = vec![
2221
            // d + b
2222
            (
2223
                Arc::new(BinaryExpr::new(
2224
                    Arc::clone(col_d),
2225
                    Operator::Plus,
2226
                    Arc::clone(col_b),
2227
                )) as Arc<dyn PhysicalExpr>,
2228
                SortProperties::Ordered(option_asc),
2229
            ),
2230
            // b
2231
            (Arc::clone(col_b), SortProperties::Ordered(option_asc)),
2232
            // a
2233
            (Arc::clone(col_a), SortProperties::Ordered(option_asc)),
2234
            // a + c
2235
            (
2236
                Arc::new(BinaryExpr::new(
2237
                    Arc::clone(col_a),
2238
                    Operator::Plus,
2239
                    Arc::clone(col_c),
2240
                )),
2241
                SortProperties::Unordered,
2242
            ),
2243
        ];
2244
        for (expr, expected) in test_cases {
2245
            let leading_orderings = eq_properties
2246
                .oeq_class()
2247
                .iter()
2248
                .flat_map(|ordering| ordering.first().cloned())
2249
                .collect::<Vec<_>>();
2250
            let expr_props = eq_properties.get_expr_properties(Arc::clone(&expr));
2251
            let err_msg = format!(
2252
                "expr:{:?}, expected: {:?}, actual: {:?}, leading_orderings: {leading_orderings:?}",
2253
                expr, expected, expr_props.sort_properties
2254
            );
2255
            assert_eq!(expr_props.sort_properties, expected, "{}", err_msg);
2256
        }
2257
2258
        Ok(())
2259
    }
2260
2261
    #[test]
2262
    fn test_find_longest_permutation_random() -> Result<()> {
2263
        const N_RANDOM_SCHEMA: usize = 100;
2264
        const N_ELEMENTS: usize = 125;
2265
        const N_DISTINCT: usize = 5;
2266
2267
        for seed in 0..N_RANDOM_SCHEMA {
2268
            // Create a random schema with random properties
2269
            let (test_schema, eq_properties) = create_random_schema(seed as u64)?;
2270
            // Generate a data that satisfies properties given
2271
            let table_data_with_properties =
2272
                generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?;
2273
2274
            let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new());
2275
            let floor_a = crate::udf::create_physical_expr(
2276
                &test_fun,
2277
                &[col("a", &test_schema)?],
2278
                &test_schema,
2279
                &[],
2280
                &DFSchema::empty(),
2281
            )?;
2282
            let a_plus_b = Arc::new(BinaryExpr::new(
2283
                col("a", &test_schema)?,
2284
                Operator::Plus,
2285
                col("b", &test_schema)?,
2286
            )) as Arc<dyn PhysicalExpr>;
2287
            let exprs = [
2288
                col("a", &test_schema)?,
2289
                col("b", &test_schema)?,
2290
                col("c", &test_schema)?,
2291
                col("d", &test_schema)?,
2292
                col("e", &test_schema)?,
2293
                col("f", &test_schema)?,
2294
                floor_a,
2295
                a_plus_b,
2296
            ];
2297
2298
            for n_req in 0..=exprs.len() {
2299
                for exprs in exprs.iter().combinations(n_req) {
2300
                    let exprs = exprs.into_iter().cloned().collect::<Vec<_>>();
2301
                    let (ordering, indices) =
2302
                        eq_properties.find_longest_permutation(&exprs);
2303
                    // Make sure that find_longest_permutation return values are consistent
2304
                    let ordering2 = indices
2305
                        .iter()
2306
                        .zip(ordering.iter())
2307
                        .map(|(&idx, sort_expr)| PhysicalSortExpr {
2308
                            expr: Arc::clone(&exprs[idx]),
2309
                            options: sort_expr.options,
2310
                        })
2311
                        .collect::<Vec<_>>();
2312
                    assert_eq!(
2313
                        ordering, ordering2,
2314
                        "indices and lexicographical ordering do not match"
2315
                    );
2316
2317
                    let err_msg = format!(
2318
                        "Error in test case ordering:{:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}",
2319
                        ordering, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants
2320
                    );
2321
                    assert_eq!(ordering.len(), indices.len(), "{}", err_msg);
2322
                    // Since ordered section satisfies schema, we expect
2323
                    // that result will be same after sort (e.g sort was unnecessary).
2324
                    assert!(
2325
                        is_table_same_after_sort(
2326
                            ordering.clone(),
2327
                            table_data_with_properties.clone(),
2328
                        )?,
2329
                        "{}",
2330
                        err_msg
2331
                    );
2332
                }
2333
            }
2334
        }
2335
2336
        Ok(())
2337
    }
2338
    #[test]
2339
    fn test_find_longest_permutation() -> Result<()> {
2340
        // Schema satisfies following orderings:
2341
        // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC]
2342
        // and
2343
        // Column [a=c] (e.g they are aliases).
2344
        // At below we add [d ASC, h DESC] also, for test purposes
2345
        let (test_schema, mut eq_properties) = create_test_params()?;
2346
        let col_a = &col("a", &test_schema)?;
2347
        let col_b = &col("b", &test_schema)?;
2348
        let col_c = &col("c", &test_schema)?;
2349
        let col_d = &col("d", &test_schema)?;
2350
        let col_e = &col("e", &test_schema)?;
2351
        let col_f = &col("f", &test_schema)?;
2352
        let col_h = &col("h", &test_schema)?;
2353
        // a + d
2354
        let a_plus_d = Arc::new(BinaryExpr::new(
2355
            Arc::clone(col_a),
2356
            Operator::Plus,
2357
            Arc::clone(col_d),
2358
        )) as Arc<dyn PhysicalExpr>;
2359
2360
        let option_asc = SortOptions {
2361
            descending: false,
2362
            nulls_first: false,
2363
        };
2364
        let option_desc = SortOptions {
2365
            descending: true,
2366
            nulls_first: true,
2367
        };
2368
        // [d ASC, h DESC] also satisfies schema.
2369
        eq_properties.add_new_orderings([vec![
2370
            PhysicalSortExpr {
2371
                expr: Arc::clone(col_d),
2372
                options: option_asc,
2373
            },
2374
            PhysicalSortExpr {
2375
                expr: Arc::clone(col_h),
2376
                options: option_desc,
2377
            },
2378
        ]]);
2379
        let test_cases = vec![
2380
            // TEST CASE 1
2381
            (vec![col_a], vec![(col_a, option_asc)]),
2382
            // TEST CASE 2
2383
            (vec![col_c], vec![(col_c, option_asc)]),
2384
            // TEST CASE 3
2385
            (
2386
                vec![col_d, col_e, col_b],
2387
                vec![
2388
                    (col_d, option_asc),
2389
                    (col_e, option_desc),
2390
                    (col_b, option_asc),
2391
                ],
2392
            ),
2393
            // TEST CASE 4
2394
            (vec![col_b], vec![]),
2395
            // TEST CASE 5
2396
            (vec![col_d], vec![(col_d, option_asc)]),
2397
            // TEST CASE 5
2398
            (vec![&a_plus_d], vec![(&a_plus_d, option_asc)]),
2399
            // TEST CASE 6
2400
            (
2401
                vec![col_b, col_d],
2402
                vec![(col_d, option_asc), (col_b, option_asc)],
2403
            ),
2404
            // TEST CASE 6
2405
            (
2406
                vec![col_c, col_e],
2407
                vec![(col_c, option_asc), (col_e, option_desc)],
2408
            ),
2409
            // TEST CASE 7
2410
            (
2411
                vec![col_d, col_h, col_e, col_f, col_b],
2412
                vec![
2413
                    (col_d, option_asc),
2414
                    (col_e, option_desc),
2415
                    (col_h, option_desc),
2416
                    (col_f, option_asc),
2417
                    (col_b, option_asc),
2418
                ],
2419
            ),
2420
            // TEST CASE 8
2421
            (
2422
                vec![col_e, col_d, col_h, col_f, col_b],
2423
                vec![
2424
                    (col_e, option_desc),
2425
                    (col_d, option_asc),
2426
                    (col_h, option_desc),
2427
                    (col_f, option_asc),
2428
                    (col_b, option_asc),
2429
                ],
2430
            ),
2431
            // TEST CASE 9
2432
            (
2433
                vec![col_e, col_d, col_b, col_h, col_f],
2434
                vec![
2435
                    (col_e, option_desc),
2436
                    (col_d, option_asc),
2437
                    (col_b, option_asc),
2438
                    (col_h, option_desc),
2439
                    (col_f, option_asc),
2440
                ],
2441
            ),
2442
        ];
2443
        for (exprs, expected) in test_cases {
2444
            let exprs = exprs.into_iter().cloned().collect::<Vec<_>>();
2445
            let expected = convert_to_sort_exprs(&expected);
2446
            let (actual, _) = eq_properties.find_longest_permutation(&exprs);
2447
            assert_eq!(actual, expected);
2448
        }
2449
2450
        Ok(())
2451
    }
2452
2453
    #[test]
2454
    fn test_find_longest_permutation2() -> Result<()> {
2455
        // Schema satisfies following orderings:
2456
        // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC]
2457
        // and
2458
        // Column [a=c] (e.g they are aliases).
2459
        // At below we add [d ASC, h DESC] also, for test purposes
2460
        let (test_schema, mut eq_properties) = create_test_params()?;
2461
        let col_h = &col("h", &test_schema)?;
2462
2463
        // Add column h as constant
2464
        eq_properties = eq_properties.with_constants(vec![ConstExpr::from(col_h)]);
2465
2466
        let test_cases = vec![
2467
            // TEST CASE 1
2468
            // ordering of the constants are treated as default ordering.
2469
            // This is the convention currently used.
2470
            (vec![col_h], vec![(col_h, SortOptions::default())]),
2471
        ];
2472
        for (exprs, expected) in test_cases {
2473
            let exprs = exprs.into_iter().cloned().collect::<Vec<_>>();
2474
            let expected = convert_to_sort_exprs(&expected);
2475
            let (actual, _) = eq_properties.find_longest_permutation(&exprs);
2476
            assert_eq!(actual, expected);
2477
        }
2478
2479
        Ok(())
2480
    }
2481
2482
    #[test]
2483
    fn test_get_finer() -> Result<()> {
2484
        let schema = create_test_schema()?;
2485
        let col_a = &col("a", &schema)?;
2486
        let col_b = &col("b", &schema)?;
2487
        let col_c = &col("c", &schema)?;
2488
        let eq_properties = EquivalenceProperties::new(schema);
2489
        let option_asc = SortOptions {
2490
            descending: false,
2491
            nulls_first: false,
2492
        };
2493
        let option_desc = SortOptions {
2494
            descending: true,
2495
            nulls_first: true,
2496
        };
2497
        // First entry, and second entry are the physical sort requirement that are argument for get_finer_requirement.
2498
        // Third entry is the expected result.
2499
        let tests_cases = vec![
2500
            // Get finer requirement between [a Some(ASC)] and [a None, b Some(ASC)]
2501
            // result should be [a Some(ASC), b Some(ASC)]
2502
            (
2503
                vec![(col_a, Some(option_asc))],
2504
                vec![(col_a, None), (col_b, Some(option_asc))],
2505
                Some(vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))]),
2506
            ),
2507
            // Get finer requirement between [a Some(ASC), b Some(ASC), c Some(ASC)] and [a Some(ASC), b Some(ASC)]
2508
            // result should be [a Some(ASC), b Some(ASC), c Some(ASC)]
2509
            (
2510
                vec![
2511
                    (col_a, Some(option_asc)),
2512
                    (col_b, Some(option_asc)),
2513
                    (col_c, Some(option_asc)),
2514
                ],
2515
                vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))],
2516
                Some(vec![
2517
                    (col_a, Some(option_asc)),
2518
                    (col_b, Some(option_asc)),
2519
                    (col_c, Some(option_asc)),
2520
                ]),
2521
            ),
2522
            // Get finer requirement between [a Some(ASC), b Some(ASC)] and [a Some(ASC), b Some(DESC)]
2523
            // result should be None
2524
            (
2525
                vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))],
2526
                vec![(col_a, Some(option_asc)), (col_b, Some(option_desc))],
2527
                None,
2528
            ),
2529
        ];
2530
        for (lhs, rhs, expected) in tests_cases {
2531
            let lhs = convert_to_sort_reqs(&lhs);
2532
            let rhs = convert_to_sort_reqs(&rhs);
2533
            let expected = expected.map(|expected| convert_to_sort_reqs(&expected));
2534
            let finer = eq_properties.get_finer_requirement(&lhs, &rhs);
2535
            assert_eq!(finer, expected)
2536
        }
2537
2538
        Ok(())
2539
    }
2540
2541
    #[test]
2542
    fn test_normalize_sort_reqs() -> Result<()> {
2543
        // Schema satisfies following properties
2544
        // a=c
2545
        // and following orderings are valid
2546
        // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC]
2547
        let (test_schema, eq_properties) = create_test_params()?;
2548
        let col_a = &col("a", &test_schema)?;
2549
        let col_b = &col("b", &test_schema)?;
2550
        let col_c = &col("c", &test_schema)?;
2551
        let col_d = &col("d", &test_schema)?;
2552
        let col_e = &col("e", &test_schema)?;
2553
        let col_f = &col("f", &test_schema)?;
2554
        let option_asc = SortOptions {
2555
            descending: false,
2556
            nulls_first: false,
2557
        };
2558
        let option_desc = SortOptions {
2559
            descending: true,
2560
            nulls_first: true,
2561
        };
2562
        // First element in the tuple stores vector of requirement, second element is the expected return value for ordering_satisfy function
2563
        let requirements = vec![
2564
            (
2565
                vec![(col_a, Some(option_asc))],
2566
                vec![(col_a, Some(option_asc))],
2567
            ),
2568
            (
2569
                vec![(col_a, Some(option_desc))],
2570
                vec![(col_a, Some(option_desc))],
2571
            ),
2572
            (vec![(col_a, None)], vec![(col_a, None)]),
2573
            // Test whether equivalence works as expected
2574
            (
2575
                vec![(col_c, Some(option_asc))],
2576
                vec![(col_a, Some(option_asc))],
2577
            ),
2578
            (vec![(col_c, None)], vec![(col_a, None)]),
2579
            // Test whether ordering equivalence works as expected
2580
            (
2581
                vec![(col_d, Some(option_asc)), (col_b, Some(option_asc))],
2582
                vec![(col_d, Some(option_asc)), (col_b, Some(option_asc))],
2583
            ),
2584
            (
2585
                vec![(col_d, None), (col_b, None)],
2586
                vec![(col_d, None), (col_b, None)],
2587
            ),
2588
            (
2589
                vec![(col_e, Some(option_desc)), (col_f, Some(option_asc))],
2590
                vec![(col_e, Some(option_desc)), (col_f, Some(option_asc))],
2591
            ),
2592
            // We should be able to normalize in compatible requirements also (not exactly equal)
2593
            (
2594
                vec![(col_e, Some(option_desc)), (col_f, None)],
2595
                vec![(col_e, Some(option_desc)), (col_f, None)],
2596
            ),
2597
            (
2598
                vec![(col_e, None), (col_f, None)],
2599
                vec![(col_e, None), (col_f, None)],
2600
            ),
2601
        ];
2602
2603
        for (reqs, expected_normalized) in requirements.into_iter() {
2604
            let req = convert_to_sort_reqs(&reqs);
2605
            let expected_normalized = convert_to_sort_reqs(&expected_normalized);
2606
2607
            assert_eq!(
2608
                eq_properties.normalize_sort_requirements(&req),
2609
                expected_normalized
2610
            );
2611
        }
2612
2613
        Ok(())
2614
    }
2615
2616
    #[test]
2617
    fn test_schema_normalize_sort_requirement_with_equivalence() -> Result<()> {
2618
        let option1 = SortOptions {
2619
            descending: false,
2620
            nulls_first: false,
2621
        };
2622
        // Assume that column a and c are aliases.
2623
        let (test_schema, eq_properties) = create_test_params()?;
2624
        let col_a = &col("a", &test_schema)?;
2625
        let col_c = &col("c", &test_schema)?;
2626
        let col_d = &col("d", &test_schema)?;
2627
2628
        // Test cases for equivalence normalization
2629
        // First entry in the tuple is PhysicalSortRequirement, second entry in the tuple is
2630
        // expected PhysicalSortRequirement after normalization.
2631
        let test_cases = vec![
2632
            (vec![(col_a, Some(option1))], vec![(col_a, Some(option1))]),
2633
            // In the normalized version column c should be replace with column a
2634
            (vec![(col_c, Some(option1))], vec![(col_a, Some(option1))]),
2635
            (vec![(col_c, None)], vec![(col_a, None)]),
2636
            (vec![(col_d, Some(option1))], vec![(col_d, Some(option1))]),
2637
        ];
2638
        for (reqs, expected) in test_cases.into_iter() {
2639
            let reqs = convert_to_sort_reqs(&reqs);
2640
            let expected = convert_to_sort_reqs(&expected);
2641
2642
            let normalized = eq_properties.normalize_sort_requirements(&reqs);
2643
            assert!(
2644
                expected.eq(&normalized),
2645
                "error in test: reqs: {reqs:?}, expected: {expected:?}, normalized: {normalized:?}"
2646
            );
2647
        }
2648
2649
        Ok(())
2650
    }
2651
2652
    #[test]
2653
    fn test_eliminate_redundant_monotonic_sorts() -> Result<()> {
2654
        let schema = Arc::new(Schema::new(vec![
2655
            Field::new("a", DataType::Date32, true),
2656
            Field::new("b", DataType::Utf8, true),
2657
            Field::new("c", DataType::Timestamp(TimeUnit::Nanosecond, None), true),
2658
        ]));
2659
        let base_properties = EquivalenceProperties::new(Arc::clone(&schema))
2660
            .with_reorder(
2661
                ["a", "b", "c"]
2662
                    .into_iter()
2663
                    .map(|c| {
2664
                        col(c, schema.as_ref()).map(|expr| PhysicalSortExpr {
2665
                            expr,
2666
                            options: SortOptions {
2667
                                descending: false,
2668
                                nulls_first: true,
2669
                            },
2670
                        })
2671
                    })
2672
                    .collect::<Result<Vec<_>>>()?,
2673
            );
2674
2675
        struct TestCase {
2676
            name: &'static str,
2677
            constants: Vec<Arc<dyn PhysicalExpr>>,
2678
            equal_conditions: Vec<[Arc<dyn PhysicalExpr>; 2]>,
2679
            sort_columns: &'static [&'static str],
2680
            should_satisfy_ordering: bool,
2681
        }
2682
2683
        let col_a = col("a", schema.as_ref())?;
2684
        let col_b = col("b", schema.as_ref())?;
2685
        let col_c = col("c", schema.as_ref())?;
2686
        let cast_c = Arc::new(CastExpr::new(col_c, DataType::Date32, None));
2687
2688
        let cases = vec![
2689
            TestCase {
2690
                name: "(a, b, c) -> (c)",
2691
                // b is constant, so it should be removed from the sort order
2692
                constants: vec![Arc::clone(&col_b)],
2693
                equal_conditions: vec![[
2694
                    Arc::clone(&cast_c) as Arc<dyn PhysicalExpr>,
2695
                    Arc::clone(&col_a),
2696
                ]],
2697
                sort_columns: &["c"],
2698
                should_satisfy_ordering: true,
2699
            },
2700
            // Same test with above test, where equality order is swapped.
2701
            // Algorithm shouldn't depend on this order.
2702
            TestCase {
2703
                name: "(a, b, c) -> (c)",
2704
                // b is constant, so it should be removed from the sort order
2705
                constants: vec![col_b],
2706
                equal_conditions: vec![[
2707
                    Arc::clone(&col_a),
2708
                    Arc::clone(&cast_c) as Arc<dyn PhysicalExpr>,
2709
                ]],
2710
                sort_columns: &["c"],
2711
                should_satisfy_ordering: true,
2712
            },
2713
            TestCase {
2714
                name: "not ordered because (b) is not constant",
2715
                // b is not constant anymore
2716
                constants: vec![],
2717
                // a and c are still compatible, but this is irrelevant since the original ordering is (a, b, c)
2718
                equal_conditions: vec![[
2719
                    Arc::clone(&cast_c) as Arc<dyn PhysicalExpr>,
2720
                    Arc::clone(&col_a),
2721
                ]],
2722
                sort_columns: &["c"],
2723
                should_satisfy_ordering: false,
2724
            },
2725
        ];
2726
2727
        for case in cases {
2728
            // Construct the equivalence properties in different orders
2729
            // to exercise different code paths
2730
            // (The resulting properties _should_ be the same)
2731
            for properties in [
2732
                // Equal conditions before constants
2733
                {
2734
                    let mut properties = base_properties.clone();
2735
                    for [left, right] in &case.equal_conditions {
2736
                        properties.add_equal_conditions(left, right)?
2737
                    }
2738
                    properties.with_constants(
2739
                        case.constants.iter().cloned().map(ConstExpr::from),
2740
                    )
2741
                },
2742
                // Constants before equal conditions
2743
                {
2744
                    let mut properties = base_properties.clone().with_constants(
2745
                        case.constants.iter().cloned().map(ConstExpr::from),
2746
                    );
2747
                    for [left, right] in &case.equal_conditions {
2748
                        properties.add_equal_conditions(left, right)?
2749
                    }
2750
                    properties
2751
                },
2752
            ] {
2753
                let sort = case
2754
                    .sort_columns
2755
                    .iter()
2756
                    .map(|&name| {
2757
                        col(name, &schema).map(|col| PhysicalSortExpr {
2758
                            expr: col,
2759
                            options: SortOptions::default(),
2760
                        })
2761
                    })
2762
                    .collect::<Result<Vec<_>>>()?;
2763
2764
                assert_eq!(
2765
                    properties.ordering_satisfy(&sort),
2766
                    case.should_satisfy_ordering,
2767
                    "failed test '{}'",
2768
                    case.name
2769
                );
2770
            }
2771
        }
2772
2773
        Ok(())
2774
    }
2775
2776
    /// Return a new schema with the same types, but new field names
2777
    ///
2778
    /// The new field names are the old field names with `text` appended.
2779
    ///
2780
    /// For example, the schema "a", "b", "c" becomes "a1", "b1", "c1"
2781
    /// if `text` is "1".
2782
    fn append_fields(schema: &SchemaRef, text: &str) -> SchemaRef {
2783
        Arc::new(Schema::new(
2784
            schema
2785
                .fields()
2786
                .iter()
2787
                .map(|field| {
2788
                    Field::new(
2789
                        // Annotate name with `text`:
2790
                        format!("{}{}", field.name(), text),
2791
                        field.data_type().clone(),
2792
                        field.is_nullable(),
2793
                    )
2794
                })
2795
                .collect::<Vec<_>>(),
2796
        ))
2797
    }
2798
2799
    #[test]
2800
    fn test_union_equivalence_properties_multi_children_1() {
2801
        let schema = create_test_schema().unwrap();
2802
        let schema2 = append_fields(&schema, "1");
2803
        let schema3 = append_fields(&schema, "2");
2804
        UnionEquivalenceTest::new(&schema)
2805
            // Children 1
2806
            .with_child_sort(vec![vec!["a", "b", "c"]], &schema)
2807
            // Children 2
2808
            .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2)
2809
            // Children 3
2810
            .with_child_sort(vec![vec!["a2", "b2"]], &schema3)
2811
            .with_expected_sort(vec![vec!["a", "b"]])
2812
            .run()
2813
    }
2814
2815
    #[test]
2816
    fn test_union_equivalence_properties_multi_children_2() {
2817
        let schema = create_test_schema().unwrap();
2818
        let schema2 = append_fields(&schema, "1");
2819
        let schema3 = append_fields(&schema, "2");
2820
        UnionEquivalenceTest::new(&schema)
2821
            // Children 1
2822
            .with_child_sort(vec![vec!["a", "b", "c"]], &schema)
2823
            // Children 2
2824
            .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2)
2825
            // Children 3
2826
            .with_child_sort(vec![vec!["a2", "b2", "c2"]], &schema3)
2827
            .with_expected_sort(vec![vec!["a", "b", "c"]])
2828
            .run()
2829
    }
2830
2831
    #[test]
2832
    fn test_union_equivalence_properties_multi_children_3() {
2833
        let schema = create_test_schema().unwrap();
2834
        let schema2 = append_fields(&schema, "1");
2835
        let schema3 = append_fields(&schema, "2");
2836
        UnionEquivalenceTest::new(&schema)
2837
            // Children 1
2838
            .with_child_sort(vec![vec!["a", "b"]], &schema)
2839
            // Children 2
2840
            .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2)
2841
            // Children 3
2842
            .with_child_sort(vec![vec!["a2", "b2", "c2"]], &schema3)
2843
            .with_expected_sort(vec![vec!["a", "b"]])
2844
            .run()
2845
    }
2846
2847
    #[test]
2848
    fn test_union_equivalence_properties_multi_children_4() {
2849
        let schema = create_test_schema().unwrap();
2850
        let schema2 = append_fields(&schema, "1");
2851
        let schema3 = append_fields(&schema, "2");
2852
        UnionEquivalenceTest::new(&schema)
2853
            // Children 1
2854
            .with_child_sort(vec![vec!["a", "b"]], &schema)
2855
            // Children 2
2856
            .with_child_sort(vec![vec!["a1", "b1"]], &schema2)
2857
            // Children 3
2858
            .with_child_sort(vec![vec!["b2", "c2"]], &schema3)
2859
            .with_expected_sort(vec![])
2860
            .run()
2861
    }
2862
2863
    #[test]
2864
    fn test_union_equivalence_properties_multi_children_5() {
2865
        let schema = create_test_schema().unwrap();
2866
        let schema2 = append_fields(&schema, "1");
2867
        UnionEquivalenceTest::new(&schema)
2868
            // Children 1
2869
            .with_child_sort(vec![vec!["a", "b"], vec!["c"]], &schema)
2870
            // Children 2
2871
            .with_child_sort(vec![vec!["a1", "b1"], vec!["c1"]], &schema2)
2872
            .with_expected_sort(vec![vec!["a", "b"], vec!["c"]])
2873
            .run()
2874
    }
2875
2876
    #[test]
2877
    fn test_union_equivalence_properties_constants_1() {
2878
        let schema = create_test_schema().unwrap();
2879
        UnionEquivalenceTest::new(&schema)
2880
            .with_child_sort_and_const_exprs(
2881
                // First child: [a ASC], const [b, c]
2882
                vec![vec!["a"]],
2883
                vec!["b", "c"],
2884
                &schema,
2885
            )
2886
            .with_child_sort_and_const_exprs(
2887
                // Second child: [b ASC], const [a, c]
2888
                vec![vec!["b"]],
2889
                vec!["a", "c"],
2890
                &schema,
2891
            )
2892
            .with_expected_sort_and_const_exprs(
2893
                // Union expected orderings: [[a ASC], [b ASC]], const [c]
2894
                vec![vec!["a"], vec!["b"]],
2895
                vec!["c"],
2896
            )
2897
            .run()
2898
    }
2899
2900
    #[test]
2901
    fn test_union_equivalence_properties_constants_2() {
2902
        let schema = create_test_schema().unwrap();
2903
        UnionEquivalenceTest::new(&schema)
2904
            // Meet ordering between [a ASC], [a ASC, b ASC] should be [a ASC]
2905
            .with_child_sort_and_const_exprs(
2906
                // First child: [a ASC], const []
2907
                vec![vec!["a"]],
2908
                vec![],
2909
                &schema,
2910
            )
2911
            .with_child_sort_and_const_exprs(
2912
                // Second child: [a ASC, b ASC], const []
2913
                vec![vec!["a", "b"]],
2914
                vec![],
2915
                &schema,
2916
            )
2917
            .with_expected_sort_and_const_exprs(
2918
                // Union orderings: [a ASC], const []
2919
                vec![vec!["a"]],
2920
                vec![],
2921
            )
2922
            .run()
2923
    }
2924
2925
    #[test]
2926
    fn test_union_equivalence_properties_constants_3() {
2927
        let schema = create_test_schema().unwrap();
2928
        UnionEquivalenceTest::new(&schema)
2929
            // Meet ordering between [a ASC], [a DESC] should be []
2930
            .with_child_sort_and_const_exprs(
2931
                // First child: [a ASC], const []
2932
                vec![vec!["a"]],
2933
                vec![],
2934
                &schema,
2935
            )
2936
            .with_child_sort_and_const_exprs(
2937
                // Second child orderings: [a DESC], const []
2938
                vec![vec!["a DESC"]],
2939
                vec![],
2940
                &schema,
2941
            )
2942
            .with_expected_sort_and_const_exprs(
2943
                // Union doesn't have any ordering or constant
2944
                vec![],
2945
                vec![],
2946
            )
2947
            .run()
2948
    }
2949
2950
    #[test]
2951
    fn test_union_equivalence_properties_constants_4() {
2952
        let schema = create_test_schema().unwrap();
2953
        let schema2 = append_fields(&schema, "1");
2954
        UnionEquivalenceTest::new(&schema)
2955
            .with_child_sort_and_const_exprs(
2956
                // First child orderings: [a ASC], const []
2957
                vec![vec!["a"]],
2958
                vec![],
2959
                &schema,
2960
            )
2961
            .with_child_sort_and_const_exprs(
2962
                // Second child orderings: [a1 ASC, b1 ASC], const []
2963
                vec![vec!["a1", "b1"]],
2964
                vec![],
2965
                &schema2,
2966
            )
2967
            .with_expected_sort_and_const_exprs(
2968
                // Union orderings:
2969
                // should be [a ASC]
2970
                //
2971
                // Where a, and a1 ath the same index for their corresponding
2972
                // schemas.
2973
                vec![vec!["a"]],
2974
                vec![],
2975
            )
2976
            .run()
2977
    }
2978
2979
    #[test]
2980
    #[ignore]
2981
    // ignored due to https://github.com/apache/datafusion/issues/12446
2982
    fn test_union_equivalence_properties_constants() {
2983
        let schema = create_test_schema().unwrap();
2984
        UnionEquivalenceTest::new(&schema)
2985
            .with_child_sort_and_const_exprs(
2986
                // First child orderings: [a ASC, c ASC], const [b]
2987
                vec![vec!["a", "c"]],
2988
                vec!["b"],
2989
                &schema,
2990
            )
2991
            .with_child_sort_and_const_exprs(
2992
                // Second child orderings: [b ASC, c ASC], const [a]
2993
                vec![vec!["b", "c"]],
2994
                vec!["a"],
2995
                &schema,
2996
            )
2997
            .with_expected_sort_and_const_exprs(
2998
                // Union orderings: [
2999
                //   [a ASC, b ASC, c ASC],
3000
                //   [b ASC, a ASC, c ASC]
3001
                // ], const []
3002
                vec![vec!["a", "b", "c"], vec!["b", "a", "c"]],
3003
                vec![],
3004
            )
3005
            .run()
3006
    }
3007
3008
    #[test]
3009
    #[ignore]
3010
    // ignored due to https://github.com/apache/datafusion/issues/12446
3011
    fn test_union_equivalence_properties_constants_desc() {
3012
        let schema = create_test_schema().unwrap();
3013
        UnionEquivalenceTest::new(&schema)
3014
            .with_child_sort_and_const_exprs(
3015
                // NB `b DESC` in the second child
3016
                // First child orderings: [a ASC, c ASC], const [b]
3017
                vec![vec!["a", "c"]],
3018
                vec!["b"],
3019
                &schema,
3020
            )
3021
            .with_child_sort_and_const_exprs(
3022
                // Second child orderings: [b ASC, c ASC], const [a]
3023
                vec![vec!["b DESC", "c"]],
3024
                vec!["a"],
3025
                &schema,
3026
            )
3027
            .with_expected_sort_and_const_exprs(
3028
                // Union orderings: [
3029
                //   [a ASC, b ASC, c ASC],
3030
                //   [b ASC, a ASC, c ASC]
3031
                // ], const []
3032
                vec![vec!["a", "b DESC", "c"], vec!["b DESC", "a", "c"]],
3033
                vec![],
3034
            )
3035
            .run()
3036
    }
3037
3038
    #[test]
3039
    #[ignore]
3040
    // ignored due to https://github.com/apache/datafusion/issues/12446
3041
    fn test_union_equivalence_properties_constants_middle() {
3042
        let schema = create_test_schema().unwrap();
3043
        UnionEquivalenceTest::new(&schema)
3044
            .with_child_sort_and_const_exprs(
3045
                // First child: [a ASC, b ASC, d ASC], const [c]
3046
                vec![vec!["a", "b", "d"]],
3047
                vec!["c"],
3048
                &schema,
3049
            )
3050
            .with_child_sort_and_const_exprs(
3051
                // Second child: [a ASC, c ASC, d ASC], const [b]
3052
                vec![vec!["a", "c", "d"]],
3053
                vec!["b"],
3054
                &schema,
3055
            )
3056
            .with_expected_sort_and_const_exprs(
3057
                // Union orderings:
3058
                // [a, b, d] (c constant)
3059
                // [a, c, d] (b constant)
3060
                vec![vec!["a", "c", "b", "d"], vec!["a", "b", "c", "d"]],
3061
                vec![],
3062
            )
3063
            .run()
3064
    }
3065
3066
    #[test]
3067
    #[ignore]
3068
    // ignored due to https://github.com/apache/datafusion/issues/12446
3069
    fn test_union_equivalence_properties_constants_middle_desc() {
3070
        let schema = create_test_schema().unwrap();
3071
        UnionEquivalenceTest::new(&schema)
3072
            .with_child_sort_and_const_exprs(
3073
                // NB `b DESC` in the first child
3074
                //
3075
                // First child: [a ASC, b DESC, d ASC], const [c]
3076
                vec![vec!["a", "b DESC", "d"]],
3077
                vec!["c"],
3078
                &schema,
3079
            )
3080
            .with_child_sort_and_const_exprs(
3081
                // Second child: [a ASC, c ASC, d ASC], const [b]
3082
                vec![vec!["a", "c", "d"]],
3083
                vec!["b"],
3084
                &schema,
3085
            )
3086
            .with_expected_sort_and_const_exprs(
3087
                // Union orderings:
3088
                // [a, b, d] (c constant)
3089
                // [a, c, d] (b constant)
3090
                vec![vec!["a", "c", "b DESC", "d"], vec!["a", "b DESC", "c", "d"]],
3091
                vec![],
3092
            )
3093
            .run()
3094
    }
3095
3096
    // TODO tests with multiple constants
3097
3098
    #[derive(Debug)]
3099
    struct UnionEquivalenceTest {
3100
        /// The schema of the output of the Union
3101
        output_schema: SchemaRef,
3102
        /// The equivalence properties of each child to the union
3103
        child_properties: Vec<EquivalenceProperties>,
3104
        /// The expected output properties of the union. Must be set before
3105
        /// running `build`
3106
        expected_properties: Option<EquivalenceProperties>,
3107
    }
3108
3109
    impl UnionEquivalenceTest {
3110
        fn new(output_schema: &SchemaRef) -> Self {
3111
            Self {
3112
                output_schema: Arc::clone(output_schema),
3113
                child_properties: vec![],
3114
                expected_properties: None,
3115
            }
3116
        }
3117
3118
        /// Add a union input with the specified orderings
3119
        ///
3120
        /// See [`Self::make_props`] for the format of the strings in `orderings`
3121
        fn with_child_sort(
3122
            mut self,
3123
            orderings: Vec<Vec<&str>>,
3124
            schema: &SchemaRef,
3125
        ) -> Self {
3126
            let properties = self.make_props(orderings, vec![], schema);
3127
            self.child_properties.push(properties);
3128
            self
3129
        }
3130
3131
        /// Add a union input with the specified orderings and constant
3132
        /// equivalences
3133
        ///
3134
        /// See [`Self::make_props`] for the format of the strings in
3135
        /// `orderings` and `constants`
3136
        fn with_child_sort_and_const_exprs(
3137
            mut self,
3138
            orderings: Vec<Vec<&str>>,
3139
            constants: Vec<&str>,
3140
            schema: &SchemaRef,
3141
        ) -> Self {
3142
            let properties = self.make_props(orderings, constants, schema);
3143
            self.child_properties.push(properties);
3144
            self
3145
        }
3146
3147
        /// Set the expected output sort order for the union of the children
3148
        ///
3149
        /// See [`Self::make_props`] for the format of the strings in `orderings`
3150
        fn with_expected_sort(mut self, orderings: Vec<Vec<&str>>) -> Self {
3151
            let properties = self.make_props(orderings, vec![], &self.output_schema);
3152
            self.expected_properties = Some(properties);
3153
            self
3154
        }
3155
3156
        /// Set the expected output sort order and constant expressions for the
3157
        /// union of the children
3158
        ///
3159
        /// See [`Self::make_props`] for the format of the strings in
3160
        /// `orderings` and `constants`.
3161
        fn with_expected_sort_and_const_exprs(
3162
            mut self,
3163
            orderings: Vec<Vec<&str>>,
3164
            constants: Vec<&str>,
3165
        ) -> Self {
3166
            let properties = self.make_props(orderings, constants, &self.output_schema);
3167
            self.expected_properties = Some(properties);
3168
            self
3169
        }
3170
3171
        /// compute the union's output equivalence properties from the child
3172
        /// properties, and compare them to the expected properties
3173
        fn run(self) {
3174
            let Self {
3175
                output_schema,
3176
                child_properties,
3177
                expected_properties,
3178
            } = self;
3179
            let expected_properties =
3180
                expected_properties.expect("expected_properties not set");
3181
            let actual_properties =
3182
                calculate_union(child_properties, Arc::clone(&output_schema))
3183
                    .expect("failed to calculate union equivalence properties");
3184
            assert_eq_properties_same(
3185
                &actual_properties,
3186
                &expected_properties,
3187
                format!(
3188
                    "expected: {expected_properties:?}\nactual:  {actual_properties:?}"
3189
                ),
3190
            );
3191
        }
3192
3193
        /// Make equivalence properties for the specified columns named in orderings and constants
3194
        ///
3195
        /// orderings: strings formatted like `"a"` or `"a DESC"`. See [`parse_sort_expr`]
3196
        /// constants: strings formatted like `"a"`.
3197
        fn make_props(
3198
            &self,
3199
            orderings: Vec<Vec<&str>>,
3200
            constants: Vec<&str>,
3201
            schema: &SchemaRef,
3202
        ) -> EquivalenceProperties {
3203
            let orderings = orderings
3204
                .iter()
3205
                .map(|ordering| {
3206
                    ordering
3207
                        .iter()
3208
                        .map(|name| parse_sort_expr(name, schema))
3209
                        .collect::<Vec<_>>()
3210
                })
3211
                .collect::<Vec<_>>();
3212
3213
            let constants = constants
3214
                .iter()
3215
                .map(|col_name| ConstExpr::new(col(col_name, schema).unwrap()))
3216
                .collect::<Vec<_>>();
3217
3218
            EquivalenceProperties::new_with_orderings(Arc::clone(schema), &orderings)
3219
                .with_constants(constants)
3220
        }
3221
    }
3222
3223
    fn assert_eq_properties_same(
3224
        lhs: &EquivalenceProperties,
3225
        rhs: &EquivalenceProperties,
3226
        err_msg: String,
3227
    ) {
3228
        // Check whether constants are same
3229
        let lhs_constants = lhs.constants();
3230
        let rhs_constants = rhs.constants();
3231
        for rhs_constant in rhs_constants {
3232
            assert!(
3233
                const_exprs_contains(lhs_constants, rhs_constant.expr()),
3234
                "{err_msg}\nlhs: {lhs}\nrhs: {rhs}"
3235
            );
3236
        }
3237
        assert_eq!(
3238
            lhs_constants.len(),
3239
            rhs_constants.len(),
3240
            "{err_msg}\nlhs: {lhs}\nrhs: {rhs}"
3241
        );
3242
3243
        // Check whether orderings are same.
3244
        let lhs_orderings = lhs.oeq_class();
3245
        let rhs_orderings = &rhs.oeq_class.orderings;
3246
        for rhs_ordering in rhs_orderings {
3247
            assert!(
3248
                lhs_orderings.contains(rhs_ordering),
3249
                "{err_msg}\nlhs: {lhs}\nrhs: {rhs}"
3250
            );
3251
        }
3252
        assert_eq!(
3253
            lhs_orderings.len(),
3254
            rhs_orderings.len(),
3255
            "{err_msg}\nlhs: {lhs}\nrhs: {rhs}"
3256
        );
3257
    }
3258
3259
    /// Converts a string to a physical sort expression
3260
    ///
3261
    /// # Example
3262
    /// * `"a"` -> (`"a"`, `SortOptions::default()`)
3263
    /// * `"a ASC"` -> (`"a"`, `SortOptions { descending: false, nulls_first: false }`)
3264
    fn parse_sort_expr(name: &str, schema: &SchemaRef) -> PhysicalSortExpr {
3265
        let mut parts = name.split_whitespace();
3266
        let name = parts.next().expect("empty sort expression");
3267
        let mut sort_expr = PhysicalSortExpr::new(
3268
            col(name, schema).expect("invalid column name"),
3269
            SortOptions::default(),
3270
        );
3271
3272
        if let Some(options) = parts.next() {
3273
            sort_expr = match options {
3274
                "ASC" => sort_expr.asc(),
3275
                "DESC" => sort_expr.desc(),
3276
                _ => panic!(
3277
                    "unknown sort options. Expected 'ASC' or 'DESC', got {}",
3278
                    options
3279
                ),
3280
            }
3281
        }
3282
3283
        assert!(
3284
            parts.next().is_none(),
3285
            "unexpected tokens in column name. Expected 'name' / 'name ASC' / 'name DESC' but got  '{name}'"
3286
        );
3287
3288
        sort_expr
3289
    }
3290
}