Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/equivalence/ordering.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::fmt::Display;
19
use std::hash::Hash;
20
use std::sync::Arc;
21
use std::vec::IntoIter;
22
23
use crate::equivalence::add_offset_to_expr;
24
use crate::{LexOrdering, PhysicalExpr, PhysicalSortExpr};
25
use arrow_schema::SortOptions;
26
27
/// An `OrderingEquivalenceClass` object keeps track of different alternative
28
/// orderings than can describe a schema. For example, consider the following table:
29
///
30
/// ```text
31
/// |a|b|c|d|
32
/// |1|4|3|1|
33
/// |2|3|3|2|
34
/// |3|1|2|2|
35
/// |3|2|1|3|
36
/// ```
37
///
38
/// Here, both `vec![a ASC, b ASC]` and `vec![c DESC, d ASC]` describe the table
39
/// ordering. In this case, we say that these orderings are equivalent.
40
#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
41
pub struct OrderingEquivalenceClass {
42
    pub orderings: Vec<LexOrdering>,
43
}
44
45
impl OrderingEquivalenceClass {
46
    /// Creates new empty ordering equivalence class.
47
1.21k
    pub fn empty() -> Self {
48
1.21k
        Default::default()
49
1.21k
    }
50
51
    /// Clears (empties) this ordering equivalence class.
52
80
    pub fn clear(&mut self) {
53
80
        self.orderings.clear();
54
80
    }
55
56
    /// Creates new ordering equivalence class from the given orderings.
57
3.61k
    pub fn new(orderings: Vec<LexOrdering>) -> Self {
58
3.61k
        let mut result = Self { orderings };
59
3.61k
        result.remove_redundant_entries();
60
3.61k
        result
61
3.61k
    }
62
63
    /// Checks whether `ordering` is a member of this equivalence class.
64
4
    pub fn contains(&self, ordering: &LexOrdering) -> bool {
65
4
        self.orderings.contains(ordering)
66
4
    }
67
68
    /// Adds `ordering` to this equivalence class.
69
    #[allow(dead_code)]
70
0
    fn push(&mut self, ordering: LexOrdering) {
71
0
        self.orderings.push(ordering);
72
0
        // Make sure that there are no redundant orderings:
73
0
        self.remove_redundant_entries();
74
0
    }
75
76
    /// Checks whether this ordering equivalence class is empty.
77
0
    pub fn is_empty(&self) -> bool {
78
0
        self.len() == 0
79
0
    }
80
81
    /// Returns an iterator over the equivalent orderings in this class.
82
3.37k
    pub fn iter(&self) -> impl Iterator<Item = &LexOrdering> {
83
3.37k
        self.orderings.iter()
84
3.37k
    }
85
86
    /// Returns how many equivalent orderings there are in this class.
87
124
    pub fn len(&self) -> usize {
88
124
        self.orderings.len()
89
124
    }
90
91
    /// Extend this ordering equivalence class with the `other` class.
92
457
    pub fn extend(&mut self, other: Self) {
93
457
        self.orderings.extend(other.orderings);
94
457
        // Make sure that there are no redundant orderings:
95
457
        self.remove_redundant_entries();
96
457
    }
97
98
    /// Adds new orderings into this ordering equivalence class.
99
340
    pub fn add_new_orderings(
100
340
        &mut self,
101
340
        orderings: impl IntoIterator<Item = LexOrdering>,
102
340
    ) {
103
340
        self.orderings.extend(orderings);
104
340
        // Make sure that there are no redundant orderings:
105
340
        self.remove_redundant_entries();
106
340
    }
107
108
    /// Adds a single ordering to the existing ordering equivalence class.
109
0
    pub fn add_new_ordering(&mut self, ordering: LexOrdering) {
110
0
        self.add_new_orderings([ordering]);
111
0
    }
112
113
    /// Removes redundant orderings from this equivalence class. For instance,
114
    /// if we already have the ordering `[a ASC, b ASC, c DESC]`, then there is
115
    /// no need to keep ordering `[a ASC, b ASC]` in the state.
116
4.41k
    fn remove_redundant_entries(&mut self) {
117
4.41k
        let mut work = true;
118
8.83k
        while work {
119
4.41k
            work = false;
120
4.41k
            let mut idx = 0;
121
6.54k
            while idx < self.orderings.len() {
122
2.12k
                let mut ordering_idx = idx + 1;
123
2.12k
                let mut removal = self.orderings[idx].is_empty();
124
2.20k
                while ordering_idx < self.orderings.len() {
125
83
                    work |= resolve_overlap(&mut self.orderings, idx, ordering_idx);
126
83
                    if self.orderings[idx].is_empty() {
127
6
                        removal = true;
128
6
                        break;
129
77
                    }
130
77
                    work |= resolve_overlap(&mut self.orderings, ordering_idx, idx);
131
77
                    if self.orderings[ordering_idx].is_empty() {
132
3
                        self.orderings.swap_remove(ordering_idx);
133
74
                    } else {
134
74
                        ordering_idx += 1;
135
74
                    }
136
                }
137
2.12k
                if removal {
138
13
                    self.orderings.swap_remove(idx);
139
2.11k
                } else {
140
2.11k
                    idx += 1;
141
2.11k
                }
142
            }
143
        }
144
4.41k
    }
145
146
    /// Returns the concatenation of all the orderings. This enables merge
147
    /// operations to preserve all equivalent orderings simultaneously.
148
4.73k
    pub fn output_ordering(&self) -> Option<LexOrdering> {
149
4.73k
        let output_ordering = self.orderings.iter().flatten().cloned().collect();
150
4.73k
        let output_ordering = collapse_lex_ordering(output_ordering);
151
4.73k
        (!output_ordering.is_empty()).then_some(output_ordering)
152
4.73k
    }
153
154
    // Append orderings in `other` to all existing orderings in this equivalence
155
    // class.
156
122
    pub fn join_suffix(mut self, other: &Self) -> Self {
157
122
        let n_ordering = self.orderings.len();
158
122
        // Replicate entries before cross product
159
122
        let n_cross = std::cmp::max(n_ordering, other.len() * n_ordering);
160
122
        self.orderings = self
161
122
            .orderings
162
122
            .iter()
163
122
            .cloned()
164
122
            .cycle()
165
122
            .take(n_cross)
166
122
            .collect();
167
        // Suffix orderings of other to the current orderings.
168
122
        for (
outer_idx, ordering34
) in other.iter().enumerate() {
169
34
            for idx in 0..n_ordering {
170
34
                // Calculate cross product index
171
34
                let idx = outer_idx * n_ordering + idx;
172
34
                self.orderings[idx].extend(ordering.iter().cloned());
173
34
            }
174
        }
175
122
        self
176
122
    }
177
178
    /// Adds `offset` value to the index of each expression inside this
179
    /// ordering equivalence class.
180
237
    pub fn add_offset(&mut self, offset: usize) {
181
237
        for 
ordering86
in self.orderings.iter_mut() {
182
208
            for 
sort_expr122
in ordering {
183
122
                sort_expr.expr = add_offset_to_expr(Arc::clone(&sort_expr.expr), offset);
184
122
            }
185
        }
186
237
    }
187
188
    /// Gets sort options associated with this expression if it is a leading
189
    /// ordering expression. Otherwise, returns `None`.
190
465
    pub fn get_options(&self, expr: &Arc<dyn PhysicalExpr>) -> Option<SortOptions> {
191
465
        for 
ordering277
in self.iter() {
192
277
            let leading_ordering = &ordering[0];
193
277
            if leading_ordering.expr.eq(expr) {
194
150
                return Some(leading_ordering.options);
195
127
            }
196
        }
197
315
        None
198
465
    }
199
}
200
201
impl IntoIterator for OrderingEquivalenceClass {
202
    type Item = LexOrdering;
203
    type IntoIter = IntoIter<LexOrdering>;
204
205
0
    fn into_iter(self) -> Self::IntoIter {
206
0
        self.orderings.into_iter()
207
0
    }
208
}
209
210
/// This function constructs a duplicate-free `LexOrdering` by filtering out
211
/// duplicate entries that have same physical expression inside. For example,
212
/// `vec![a ASC, a DESC]` collapses to `vec![a ASC]`.
213
4.73k
pub fn collapse_lex_ordering(input: LexOrdering) -> LexOrdering {
214
4.73k
    let mut output = Vec::<PhysicalSortExpr>::new();
215
6.92k
    for 
item2.18k
in input {
216
2.18k
        if !output.iter().any(|req| 
req.expr.eq(&item.expr)351
) {
217
2.18k
            output.push(item);
218
2.18k
        }
2
219
    }
220
4.73k
    output
221
4.73k
}
222
223
/// Trims `orderings[idx]` if some suffix of it overlaps with a prefix of
224
/// `orderings[pre_idx]`. Returns `true` if there is any overlap, `false` otherwise.
225
160
fn resolve_overlap(orderings: &mut [LexOrdering], idx: usize, pre_idx: usize) -> bool {
226
160
    let length = orderings[idx].len();
227
160
    let other_length = orderings[pre_idx].len();
228
201
    for overlap in 1..=
length.min(other_length)160
{
229
201
        if orderings[idx][length - overlap..] == orderings[pre_idx][..overlap] {
230
7
            orderings[idx].truncate(length - overlap);
231
7
            return true;
232
194
        }
233
    }
234
153
    false
235
160
}
236
237
impl Display for OrderingEquivalenceClass {
238
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
239
0
        write!(f, "[")?;
240
0
        let mut iter = self.orderings.iter();
241
0
        if let Some(ordering) = iter.next() {
242
0
            write!(f, "[{}]", PhysicalSortExpr::format_list(ordering))?;
243
0
        }
244
0
        for ordering in iter {
245
0
            write!(f, ", [{}]", PhysicalSortExpr::format_list(ordering))?;
246
        }
247
0
        write!(f, "]")?;
248
0
        Ok(())
249
0
    }
250
}
251
252
#[cfg(test)]
253
mod tests {
254
    use std::sync::Arc;
255
256
    use crate::equivalence::tests::{
257
        convert_to_orderings, convert_to_sort_exprs, create_random_schema,
258
        create_test_params, create_test_schema, generate_table_for_eq_properties,
259
        is_table_same_after_sort,
260
    };
261
    use crate::equivalence::{
262
        EquivalenceClass, EquivalenceGroup, EquivalenceProperties,
263
        OrderingEquivalenceClass,
264
    };
265
    use crate::expressions::{col, BinaryExpr, Column};
266
    use crate::utils::tests::TestScalarUDF;
267
    use crate::{ConstExpr, PhysicalExpr, PhysicalSortExpr};
268
269
    use arrow::datatypes::{DataType, Field, Schema};
270
    use arrow_schema::SortOptions;
271
    use datafusion_common::{DFSchema, Result};
272
    use datafusion_expr::{Operator, ScalarUDF};
273
274
    use itertools::Itertools;
275
276
    #[test]
277
    fn test_ordering_satisfy() -> Result<()> {
278
        let input_schema = Arc::new(Schema::new(vec![
279
            Field::new("a", DataType::Int64, true),
280
            Field::new("b", DataType::Int64, true),
281
        ]));
282
        let crude = vec![PhysicalSortExpr {
283
            expr: Arc::new(Column::new("a", 0)),
284
            options: SortOptions::default(),
285
        }];
286
        let finer = vec![
287
            PhysicalSortExpr {
288
                expr: Arc::new(Column::new("a", 0)),
289
                options: SortOptions::default(),
290
            },
291
            PhysicalSortExpr {
292
                expr: Arc::new(Column::new("b", 1)),
293
                options: SortOptions::default(),
294
            },
295
        ];
296
        // finer ordering satisfies, crude ordering should return true
297
        let mut eq_properties_finer =
298
            EquivalenceProperties::new(Arc::clone(&input_schema));
299
        eq_properties_finer.oeq_class.push(finer.clone());
300
        assert!(eq_properties_finer.ordering_satisfy(&crude));
301
302
        // Crude ordering doesn't satisfy finer ordering. should return false
303
        let mut eq_properties_crude =
304
            EquivalenceProperties::new(Arc::clone(&input_schema));
305
        eq_properties_crude.oeq_class.push(crude);
306
        assert!(!eq_properties_crude.ordering_satisfy(&finer));
307
        Ok(())
308
    }
309
310
    #[test]
311
    fn test_ordering_satisfy_with_equivalence2() -> Result<()> {
312
        let test_schema = create_test_schema()?;
313
        let col_a = &col("a", &test_schema)?;
314
        let col_b = &col("b", &test_schema)?;
315
        let col_c = &col("c", &test_schema)?;
316
        let col_d = &col("d", &test_schema)?;
317
        let col_e = &col("e", &test_schema)?;
318
        let col_f = &col("f", &test_schema)?;
319
        let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new());
320
        let floor_a = &crate::udf::create_physical_expr(
321
            &test_fun,
322
            &[col("a", &test_schema)?],
323
            &test_schema,
324
            &[],
325
            &DFSchema::empty(),
326
        )?;
327
        let floor_f = &crate::udf::create_physical_expr(
328
            &test_fun,
329
            &[col("f", &test_schema)?],
330
            &test_schema,
331
            &[],
332
            &DFSchema::empty(),
333
        )?;
334
        let exp_a = &crate::udf::create_physical_expr(
335
            &test_fun,
336
            &[col("a", &test_schema)?],
337
            &test_schema,
338
            &[],
339
            &DFSchema::empty(),
340
        )?;
341
        let a_plus_b = Arc::new(BinaryExpr::new(
342
            Arc::clone(col_a),
343
            Operator::Plus,
344
            Arc::clone(col_b),
345
        )) as Arc<dyn PhysicalExpr>;
346
        let options = SortOptions {
347
            descending: false,
348
            nulls_first: false,
349
        };
350
351
        let test_cases = vec![
352
            // ------------ TEST CASE 1 ------------
353
            (
354
                // orderings
355
                vec![
356
                    // [a ASC, d ASC, b ASC]
357
                    vec![(col_a, options), (col_d, options), (col_b, options)],
358
                    // [c ASC]
359
                    vec![(col_c, options)],
360
                ],
361
                // equivalence classes
362
                vec![vec![col_a, col_f]],
363
                // constants
364
                vec![col_e],
365
                // requirement [a ASC, b ASC], requirement is not satisfied.
366
                vec![(col_a, options), (col_b, options)],
367
                // expected: requirement is not satisfied.
368
                false,
369
            ),
370
            // ------------ TEST CASE 2 ------------
371
            (
372
                // orderings
373
                vec![
374
                    // [a ASC, c ASC, b ASC]
375
                    vec![(col_a, options), (col_c, options), (col_b, options)],
376
                    // [d ASC]
377
                    vec![(col_d, options)],
378
                ],
379
                // equivalence classes
380
                vec![vec![col_a, col_f]],
381
                // constants
382
                vec![col_e],
383
                // requirement [floor(a) ASC],
384
                vec![(floor_a, options)],
385
                // expected: requirement is satisfied.
386
                true,
387
            ),
388
            // ------------ TEST CASE 2.1 ------------
389
            (
390
                // orderings
391
                vec![
392
                    // [a ASC, c ASC, b ASC]
393
                    vec![(col_a, options), (col_c, options), (col_b, options)],
394
                    // [d ASC]
395
                    vec![(col_d, options)],
396
                ],
397
                // equivalence classes
398
                vec![vec![col_a, col_f]],
399
                // constants
400
                vec![col_e],
401
                // requirement [floor(f) ASC], (Please note that a=f)
402
                vec![(floor_f, options)],
403
                // expected: requirement is satisfied.
404
                true,
405
            ),
406
            // ------------ TEST CASE 3 ------------
407
            (
408
                // orderings
409
                vec![
410
                    // [a ASC, c ASC, b ASC]
411
                    vec![(col_a, options), (col_c, options), (col_b, options)],
412
                    // [d ASC]
413
                    vec![(col_d, options)],
414
                ],
415
                // equivalence classes
416
                vec![vec![col_a, col_f]],
417
                // constants
418
                vec![col_e],
419
                // requirement [a ASC, c ASC, a+b ASC],
420
                vec![(col_a, options), (col_c, options), (&a_plus_b, options)],
421
                // expected: requirement is satisfied.
422
                true,
423
            ),
424
            // ------------ TEST CASE 4 ------------
425
            (
426
                // orderings
427
                vec![
428
                    // [a ASC, b ASC, c ASC, d ASC]
429
                    vec![
430
                        (col_a, options),
431
                        (col_b, options),
432
                        (col_c, options),
433
                        (col_d, options),
434
                    ],
435
                ],
436
                // equivalence classes
437
                vec![vec![col_a, col_f]],
438
                // constants
439
                vec![col_e],
440
                // requirement [floor(a) ASC, a+b ASC],
441
                vec![(floor_a, options), (&a_plus_b, options)],
442
                // expected: requirement is satisfied.
443
                false,
444
            ),
445
            // ------------ TEST CASE 5 ------------
446
            (
447
                // orderings
448
                vec![
449
                    // [a ASC, b ASC, c ASC, d ASC]
450
                    vec![
451
                        (col_a, options),
452
                        (col_b, options),
453
                        (col_c, options),
454
                        (col_d, options),
455
                    ],
456
                ],
457
                // equivalence classes
458
                vec![vec![col_a, col_f]],
459
                // constants
460
                vec![col_e],
461
                // requirement [exp(a) ASC, a+b ASC],
462
                vec![(exp_a, options), (&a_plus_b, options)],
463
                // expected: requirement is not satisfied.
464
                // TODO: If we know that exp function is 1-to-1 function.
465
                //  we could have deduced that above requirement is satisfied.
466
                false,
467
            ),
468
            // ------------ TEST CASE 6 ------------
469
            (
470
                // orderings
471
                vec![
472
                    // [a ASC, d ASC, b ASC]
473
                    vec![(col_a, options), (col_d, options), (col_b, options)],
474
                    // [c ASC]
475
                    vec![(col_c, options)],
476
                ],
477
                // equivalence classes
478
                vec![vec![col_a, col_f]],
479
                // constants
480
                vec![col_e],
481
                // requirement [a ASC, d ASC, floor(a) ASC],
482
                vec![(col_a, options), (col_d, options), (floor_a, options)],
483
                // expected: requirement is satisfied.
484
                true,
485
            ),
486
            // ------------ TEST CASE 7 ------------
487
            (
488
                // orderings
489
                vec![
490
                    // [a ASC, c ASC, b ASC]
491
                    vec![(col_a, options), (col_c, options), (col_b, options)],
492
                    // [d ASC]
493
                    vec![(col_d, options)],
494
                ],
495
                // equivalence classes
496
                vec![vec![col_a, col_f]],
497
                // constants
498
                vec![col_e],
499
                // requirement [a ASC, floor(a) ASC, a + b ASC],
500
                vec![(col_a, options), (floor_a, options), (&a_plus_b, options)],
501
                // expected: requirement is not satisfied.
502
                false,
503
            ),
504
            // ------------ TEST CASE 8 ------------
505
            (
506
                // orderings
507
                vec![
508
                    // [a ASC, b ASC, c ASC]
509
                    vec![(col_a, options), (col_b, options), (col_c, options)],
510
                    // [d ASC]
511
                    vec![(col_d, options)],
512
                ],
513
                // equivalence classes
514
                vec![vec![col_a, col_f]],
515
                // constants
516
                vec![col_e],
517
                // requirement [a ASC, c ASC, floor(a) ASC, a + b ASC],
518
                vec![
519
                    (col_a, options),
520
                    (col_c, options),
521
                    (floor_a, options),
522
                    (&a_plus_b, options),
523
                ],
524
                // expected: requirement is not satisfied.
525
                false,
526
            ),
527
            // ------------ TEST CASE 9 ------------
528
            (
529
                // orderings
530
                vec![
531
                    // [a ASC, b ASC, c ASC, d ASC]
532
                    vec![
533
                        (col_a, options),
534
                        (col_b, options),
535
                        (col_c, options),
536
                        (col_d, options),
537
                    ],
538
                ],
539
                // equivalence classes
540
                vec![vec![col_a, col_f]],
541
                // constants
542
                vec![col_e],
543
                // requirement [a ASC, b ASC, c ASC, floor(a) ASC],
544
                vec![
545
                    (col_a, options),
546
                    (col_b, options),
547
                    (col_c, options),
548
                    (floor_a, options),
549
                ],
550
                // expected: requirement is satisfied.
551
                true,
552
            ),
553
            // ------------ TEST CASE 10 ------------
554
            (
555
                // orderings
556
                vec![
557
                    // [d ASC, b ASC]
558
                    vec![(col_d, options), (col_b, options)],
559
                    // [c ASC, a ASC]
560
                    vec![(col_c, options), (col_a, options)],
561
                ],
562
                // equivalence classes
563
                vec![vec![col_a, col_f]],
564
                // constants
565
                vec![col_e],
566
                // requirement [c ASC, d ASC, a + b ASC],
567
                vec![(col_c, options), (col_d, options), (&a_plus_b, options)],
568
                // expected: requirement is satisfied.
569
                true,
570
            ),
571
        ];
572
573
        for (orderings, eq_group, constants, reqs, expected) in test_cases {
574
            let err_msg =
575
                format!("error in test orderings: {orderings:?}, eq_group: {eq_group:?}, constants: {constants:?}, reqs: {reqs:?}, expected: {expected:?}");
576
            let mut eq_properties = EquivalenceProperties::new(Arc::clone(&test_schema));
577
            let orderings = convert_to_orderings(&orderings);
578
            eq_properties.add_new_orderings(orderings);
579
            let eq_group = eq_group
580
                .into_iter()
581
                .map(|eq_class| {
582
                    let eq_classes = eq_class.into_iter().cloned().collect::<Vec<_>>();
583
                    EquivalenceClass::new(eq_classes)
584
                })
585
                .collect::<Vec<_>>();
586
            let eq_group = EquivalenceGroup::new(eq_group);
587
            eq_properties.add_equivalence_group(eq_group);
588
589
            let constants = constants
590
                .into_iter()
591
                .map(|expr| ConstExpr::from(expr).with_across_partitions(true));
592
            eq_properties = eq_properties.with_constants(constants);
593
594
            let reqs = convert_to_sort_exprs(&reqs);
595
            assert_eq!(
596
                eq_properties.ordering_satisfy(&reqs),
597
                expected,
598
                "{}",
599
                err_msg
600
            );
601
        }
602
603
        Ok(())
604
    }
605
606
    #[test]
607
    fn test_ordering_satisfy_with_equivalence() -> Result<()> {
608
        // Schema satisfies following orderings:
609
        // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC]
610
        // and
611
        // Column [a=c] (e.g they are aliases).
612
        let (test_schema, eq_properties) = create_test_params()?;
613
        let col_a = &col("a", &test_schema)?;
614
        let col_b = &col("b", &test_schema)?;
615
        let col_c = &col("c", &test_schema)?;
616
        let col_d = &col("d", &test_schema)?;
617
        let col_e = &col("e", &test_schema)?;
618
        let col_f = &col("f", &test_schema)?;
619
        let col_g = &col("g", &test_schema)?;
620
        let option_asc = SortOptions {
621
            descending: false,
622
            nulls_first: false,
623
        };
624
        let option_desc = SortOptions {
625
            descending: true,
626
            nulls_first: true,
627
        };
628
        let table_data_with_properties =
629
            generate_table_for_eq_properties(&eq_properties, 625, 5)?;
630
631
        // First element in the tuple stores vector of requirement, second element is the expected return value for ordering_satisfy function
632
        let requirements = vec![
633
            // `a ASC NULLS LAST`, expects `ordering_satisfy` to be `true`, since existing ordering `a ASC NULLS LAST, b ASC NULLS LAST` satisfies it
634
            (vec![(col_a, option_asc)], true),
635
            (vec![(col_a, option_desc)], false),
636
            // Test whether equivalence works as expected
637
            (vec![(col_c, option_asc)], true),
638
            (vec![(col_c, option_desc)], false),
639
            // Test whether ordering equivalence works as expected
640
            (vec![(col_d, option_asc)], true),
641
            (vec![(col_d, option_asc), (col_b, option_asc)], true),
642
            (vec![(col_d, option_desc), (col_b, option_asc)], false),
643
            (
644
                vec![
645
                    (col_e, option_desc),
646
                    (col_f, option_asc),
647
                    (col_g, option_asc),
648
                ],
649
                true,
650
            ),
651
            (vec![(col_e, option_desc), (col_f, option_asc)], true),
652
            (vec![(col_e, option_asc), (col_f, option_asc)], false),
653
            (vec![(col_e, option_desc), (col_b, option_asc)], false),
654
            (vec![(col_e, option_asc), (col_b, option_asc)], false),
655
            (
656
                vec![
657
                    (col_d, option_asc),
658
                    (col_b, option_asc),
659
                    (col_d, option_asc),
660
                    (col_b, option_asc),
661
                ],
662
                true,
663
            ),
664
            (
665
                vec![
666
                    (col_d, option_asc),
667
                    (col_b, option_asc),
668
                    (col_e, option_desc),
669
                    (col_f, option_asc),
670
                ],
671
                true,
672
            ),
673
            (
674
                vec![
675
                    (col_d, option_asc),
676
                    (col_b, option_asc),
677
                    (col_e, option_desc),
678
                    (col_b, option_asc),
679
                ],
680
                true,
681
            ),
682
            (
683
                vec![
684
                    (col_d, option_asc),
685
                    (col_b, option_asc),
686
                    (col_d, option_desc),
687
                    (col_b, option_asc),
688
                ],
689
                true,
690
            ),
691
            (
692
                vec![
693
                    (col_d, option_asc),
694
                    (col_b, option_asc),
695
                    (col_e, option_asc),
696
                    (col_f, option_asc),
697
                ],
698
                false,
699
            ),
700
            (
701
                vec![
702
                    (col_d, option_asc),
703
                    (col_b, option_asc),
704
                    (col_e, option_asc),
705
                    (col_b, option_asc),
706
                ],
707
                false,
708
            ),
709
            (vec![(col_d, option_asc), (col_e, option_desc)], true),
710
            (
711
                vec![
712
                    (col_d, option_asc),
713
                    (col_c, option_asc),
714
                    (col_b, option_asc),
715
                ],
716
                true,
717
            ),
718
            (
719
                vec![
720
                    (col_d, option_asc),
721
                    (col_e, option_desc),
722
                    (col_f, option_asc),
723
                    (col_b, option_asc),
724
                ],
725
                true,
726
            ),
727
            (
728
                vec![
729
                    (col_d, option_asc),
730
                    (col_e, option_desc),
731
                    (col_c, option_asc),
732
                    (col_b, option_asc),
733
                ],
734
                true,
735
            ),
736
            (
737
                vec![
738
                    (col_d, option_asc),
739
                    (col_e, option_desc),
740
                    (col_b, option_asc),
741
                    (col_f, option_asc),
742
                ],
743
                true,
744
            ),
745
        ];
746
747
        for (cols, expected) in requirements {
748
            let err_msg = format!("Error in test case:{cols:?}");
749
            let required = cols
750
                .into_iter()
751
                .map(|(expr, options)| PhysicalSortExpr {
752
                    expr: Arc::clone(expr),
753
                    options,
754
                })
755
                .collect::<Vec<_>>();
756
757
            // Check expected result with experimental result.
758
            assert_eq!(
759
                is_table_same_after_sort(
760
                    required.clone(),
761
                    table_data_with_properties.clone()
762
                )?,
763
                expected
764
            );
765
            assert_eq!(
766
                eq_properties.ordering_satisfy(&required),
767
                expected,
768
                "{err_msg}"
769
            );
770
        }
771
        Ok(())
772
    }
773
774
    #[test]
775
    fn test_ordering_satisfy_with_equivalence_random() -> Result<()> {
776
        const N_RANDOM_SCHEMA: usize = 5;
777
        const N_ELEMENTS: usize = 125;
778
        const N_DISTINCT: usize = 5;
779
        const SORT_OPTIONS: SortOptions = SortOptions {
780
            descending: false,
781
            nulls_first: false,
782
        };
783
784
        for seed in 0..N_RANDOM_SCHEMA {
785
            // Create a random schema with random properties
786
            let (test_schema, eq_properties) = create_random_schema(seed as u64)?;
787
            // Generate a data that satisfies properties given
788
            let table_data_with_properties =
789
                generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?;
790
            let col_exprs = [
791
                col("a", &test_schema)?,
792
                col("b", &test_schema)?,
793
                col("c", &test_schema)?,
794
                col("d", &test_schema)?,
795
                col("e", &test_schema)?,
796
                col("f", &test_schema)?,
797
            ];
798
799
            for n_req in 0..=col_exprs.len() {
800
                for exprs in col_exprs.iter().combinations(n_req) {
801
                    let requirement = exprs
802
                        .into_iter()
803
                        .map(|expr| PhysicalSortExpr {
804
                            expr: Arc::clone(expr),
805
                            options: SORT_OPTIONS,
806
                        })
807
                        .collect::<Vec<_>>();
808
                    let expected = is_table_same_after_sort(
809
                        requirement.clone(),
810
                        table_data_with_properties.clone(),
811
                    )?;
812
                    let err_msg = format!(
813
                        "Error in test case requirement:{:?}, expected: {:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}",
814
                        requirement, expected, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants
815
                    );
816
                    // Check whether ordering_satisfy API result and
817
                    // experimental result matches.
818
                    assert_eq!(
819
                        eq_properties.ordering_satisfy(&requirement),
820
                        expected,
821
                        "{}",
822
                        err_msg
823
                    );
824
                }
825
            }
826
        }
827
828
        Ok(())
829
    }
830
831
    #[test]
832
    fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> {
833
        const N_RANDOM_SCHEMA: usize = 100;
834
        const N_ELEMENTS: usize = 125;
835
        const N_DISTINCT: usize = 5;
836
        const SORT_OPTIONS: SortOptions = SortOptions {
837
            descending: false,
838
            nulls_first: false,
839
        };
840
841
        for seed in 0..N_RANDOM_SCHEMA {
842
            // Create a random schema with random properties
843
            let (test_schema, eq_properties) = create_random_schema(seed as u64)?;
844
            // Generate a data that satisfies properties given
845
            let table_data_with_properties =
846
                generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?;
847
848
            let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new());
849
            let floor_a = crate::udf::create_physical_expr(
850
                &test_fun,
851
                &[col("a", &test_schema)?],
852
                &test_schema,
853
                &[],
854
                &DFSchema::empty(),
855
            )?;
856
            let a_plus_b = Arc::new(BinaryExpr::new(
857
                col("a", &test_schema)?,
858
                Operator::Plus,
859
                col("b", &test_schema)?,
860
            )) as Arc<dyn PhysicalExpr>;
861
            let exprs = [
862
                col("a", &test_schema)?,
863
                col("b", &test_schema)?,
864
                col("c", &test_schema)?,
865
                col("d", &test_schema)?,
866
                col("e", &test_schema)?,
867
                col("f", &test_schema)?,
868
                floor_a,
869
                a_plus_b,
870
            ];
871
872
            for n_req in 0..=exprs.len() {
873
                for exprs in exprs.iter().combinations(n_req) {
874
                    let requirement = exprs
875
                        .into_iter()
876
                        .map(|expr| PhysicalSortExpr {
877
                            expr: Arc::clone(expr),
878
                            options: SORT_OPTIONS,
879
                        })
880
                        .collect::<Vec<_>>();
881
                    let expected = is_table_same_after_sort(
882
                        requirement.clone(),
883
                        table_data_with_properties.clone(),
884
                    )?;
885
                    let err_msg = format!(
886
                        "Error in test case requirement:{:?}, expected: {:?}, eq_properties.oeq_class: {:?}, eq_properties.eq_group: {:?}, eq_properties.constants: {:?}",
887
                        requirement, expected, eq_properties.oeq_class, eq_properties.eq_group, eq_properties.constants
888
                    );
889
                    // Check whether ordering_satisfy API result and
890
                    // experimental result matches.
891
892
                    assert_eq!(
893
                        eq_properties.ordering_satisfy(&requirement),
894
                        (expected | false),
895
                        "{}",
896
                        err_msg
897
                    );
898
                }
899
            }
900
        }
901
902
        Ok(())
903
    }
904
905
    #[test]
906
    fn test_ordering_satisfy_different_lengths() -> Result<()> {
907
        let test_schema = create_test_schema()?;
908
        let col_a = &col("a", &test_schema)?;
909
        let col_b = &col("b", &test_schema)?;
910
        let col_c = &col("c", &test_schema)?;
911
        let col_d = &col("d", &test_schema)?;
912
        let col_e = &col("e", &test_schema)?;
913
        let col_f = &col("f", &test_schema)?;
914
        let options = SortOptions {
915
            descending: false,
916
            nulls_first: false,
917
        };
918
        // a=c (e.g they are aliases).
919
        let mut eq_properties = EquivalenceProperties::new(test_schema);
920
        eq_properties.add_equal_conditions(col_a, col_c)?;
921
922
        let orderings = vec![
923
            vec![(col_a, options)],
924
            vec![(col_e, options)],
925
            vec![(col_d, options), (col_f, options)],
926
        ];
927
        let orderings = convert_to_orderings(&orderings);
928
929
        // Column [a ASC], [e ASC], [d ASC, f ASC] are all valid orderings for the schema.
930
        eq_properties.add_new_orderings(orderings);
931
932
        // First entry in the tuple is required ordering, second entry is the expected flag
933
        // that indicates whether this required ordering is satisfied.
934
        // ([a ASC], true) indicate a ASC requirement is already satisfied by existing orderings.
935
        let test_cases = vec![
936
            // [c ASC, a ASC, e ASC], expected represents this requirement is satisfied
937
            (
938
                vec![(col_c, options), (col_a, options), (col_e, options)],
939
                true,
940
            ),
941
            (vec![(col_c, options), (col_b, options)], false),
942
            (vec![(col_c, options), (col_d, options)], true),
943
            (
944
                vec![(col_d, options), (col_f, options), (col_b, options)],
945
                false,
946
            ),
947
            (vec![(col_d, options), (col_f, options)], true),
948
        ];
949
950
        for (reqs, expected) in test_cases {
951
            let err_msg =
952
                format!("error in test reqs: {:?}, expected: {:?}", reqs, expected,);
953
            let reqs = convert_to_sort_exprs(&reqs);
954
            assert_eq!(
955
                eq_properties.ordering_satisfy(&reqs),
956
                expected,
957
                "{}",
958
                err_msg
959
            );
960
        }
961
962
        Ok(())
963
    }
964
965
    #[test]
966
    fn test_remove_redundant_entries_oeq_class() -> Result<()> {
967
        let schema = create_test_schema()?;
968
        let col_a = &col("a", &schema)?;
969
        let col_b = &col("b", &schema)?;
970
        let col_c = &col("c", &schema)?;
971
        let col_d = &col("d", &schema)?;
972
        let col_e = &col("e", &schema)?;
973
974
        let option_asc = SortOptions {
975
            descending: false,
976
            nulls_first: false,
977
        };
978
        let option_desc = SortOptions {
979
            descending: true,
980
            nulls_first: true,
981
        };
982
983
        // First entry in the tuple is the given orderings for the table
984
        // Second entry is the simplest version of the given orderings that is functionally equivalent.
985
        let test_cases = vec![
986
            // ------- TEST CASE 1 ---------
987
            (
988
                // ORDERINGS GIVEN
989
                vec![
990
                    // [a ASC, b ASC]
991
                    vec![(col_a, option_asc), (col_b, option_asc)],
992
                ],
993
                // EXPECTED orderings that is succinct.
994
                vec![
995
                    // [a ASC, b ASC]
996
                    vec![(col_a, option_asc), (col_b, option_asc)],
997
                ],
998
            ),
999
            // ------- TEST CASE 2 ---------
1000
            (
1001
                // ORDERINGS GIVEN
1002
                vec![
1003
                    // [a ASC, b ASC]
1004
                    vec![(col_a, option_asc), (col_b, option_asc)],
1005
                    // [a ASC, b ASC, c ASC]
1006
                    vec![
1007
                        (col_a, option_asc),
1008
                        (col_b, option_asc),
1009
                        (col_c, option_asc),
1010
                    ],
1011
                ],
1012
                // EXPECTED orderings that is succinct.
1013
                vec![
1014
                    // [a ASC, b ASC, c ASC]
1015
                    vec![
1016
                        (col_a, option_asc),
1017
                        (col_b, option_asc),
1018
                        (col_c, option_asc),
1019
                    ],
1020
                ],
1021
            ),
1022
            // ------- TEST CASE 3 ---------
1023
            (
1024
                // ORDERINGS GIVEN
1025
                vec![
1026
                    // [a ASC, b DESC]
1027
                    vec![(col_a, option_asc), (col_b, option_desc)],
1028
                    // [a ASC]
1029
                    vec![(col_a, option_asc)],
1030
                    // [a ASC, c ASC]
1031
                    vec![(col_a, option_asc), (col_c, option_asc)],
1032
                ],
1033
                // EXPECTED orderings that is succinct.
1034
                vec![
1035
                    // [a ASC, b DESC]
1036
                    vec![(col_a, option_asc), (col_b, option_desc)],
1037
                    // [a ASC, c ASC]
1038
                    vec![(col_a, option_asc), (col_c, option_asc)],
1039
                ],
1040
            ),
1041
            // ------- TEST CASE 4 ---------
1042
            (
1043
                // ORDERINGS GIVEN
1044
                vec![
1045
                    // [a ASC, b ASC]
1046
                    vec![(col_a, option_asc), (col_b, option_asc)],
1047
                    // [a ASC, b ASC, c ASC]
1048
                    vec![
1049
                        (col_a, option_asc),
1050
                        (col_b, option_asc),
1051
                        (col_c, option_asc),
1052
                    ],
1053
                    // [a ASC]
1054
                    vec![(col_a, option_asc)],
1055
                ],
1056
                // EXPECTED orderings that is succinct.
1057
                vec![
1058
                    // [a ASC, b ASC, c ASC]
1059
                    vec![
1060
                        (col_a, option_asc),
1061
                        (col_b, option_asc),
1062
                        (col_c, option_asc),
1063
                    ],
1064
                ],
1065
            ),
1066
            // ------- TEST CASE 5 ---------
1067
            // Empty ordering
1068
            (
1069
                vec![vec![]],
1070
                // No ordering in the state (empty ordering is ignored).
1071
                vec![],
1072
            ),
1073
            // ------- TEST CASE 6 ---------
1074
            (
1075
                // ORDERINGS GIVEN
1076
                vec![
1077
                    // [a ASC, b ASC]
1078
                    vec![(col_a, option_asc), (col_b, option_asc)],
1079
                    // [b ASC]
1080
                    vec![(col_b, option_asc)],
1081
                ],
1082
                // EXPECTED orderings that is succinct.
1083
                vec![
1084
                    // [a ASC]
1085
                    vec![(col_a, option_asc)],
1086
                    // [b ASC]
1087
                    vec![(col_b, option_asc)],
1088
                ],
1089
            ),
1090
            // ------- TEST CASE 7 ---------
1091
            // b, a
1092
            // c, a
1093
            // d, b, c
1094
            (
1095
                // ORDERINGS GIVEN
1096
                vec![
1097
                    // [b ASC, a ASC]
1098
                    vec![(col_b, option_asc), (col_a, option_asc)],
1099
                    // [c ASC, a ASC]
1100
                    vec![(col_c, option_asc), (col_a, option_asc)],
1101
                    // [d ASC, b ASC, c ASC]
1102
                    vec![
1103
                        (col_d, option_asc),
1104
                        (col_b, option_asc),
1105
                        (col_c, option_asc),
1106
                    ],
1107
                ],
1108
                // EXPECTED orderings that is succinct.
1109
                vec![
1110
                    // [b ASC, a ASC]
1111
                    vec![(col_b, option_asc), (col_a, option_asc)],
1112
                    // [c ASC, a ASC]
1113
                    vec![(col_c, option_asc), (col_a, option_asc)],
1114
                    // [d ASC]
1115
                    vec![(col_d, option_asc)],
1116
                ],
1117
            ),
1118
            // ------- TEST CASE 8 ---------
1119
            // b, e
1120
            // c, a
1121
            // d, b, e, c, a
1122
            (
1123
                // ORDERINGS GIVEN
1124
                vec![
1125
                    // [b ASC, e ASC]
1126
                    vec![(col_b, option_asc), (col_e, option_asc)],
1127
                    // [c ASC, a ASC]
1128
                    vec![(col_c, option_asc), (col_a, option_asc)],
1129
                    // [d ASC, b ASC, e ASC, c ASC, a ASC]
1130
                    vec![
1131
                        (col_d, option_asc),
1132
                        (col_b, option_asc),
1133
                        (col_e, option_asc),
1134
                        (col_c, option_asc),
1135
                        (col_a, option_asc),
1136
                    ],
1137
                ],
1138
                // EXPECTED orderings that is succinct.
1139
                vec![
1140
                    // [b ASC, e ASC]
1141
                    vec![(col_b, option_asc), (col_e, option_asc)],
1142
                    // [c ASC, a ASC]
1143
                    vec![(col_c, option_asc), (col_a, option_asc)],
1144
                    // [d ASC]
1145
                    vec![(col_d, option_asc)],
1146
                ],
1147
            ),
1148
            // ------- TEST CASE 9 ---------
1149
            // b
1150
            // a, b, c
1151
            // d, a, b
1152
            (
1153
                // ORDERINGS GIVEN
1154
                vec![
1155
                    // [b ASC]
1156
                    vec![(col_b, option_asc)],
1157
                    // [a ASC, b ASC, c ASC]
1158
                    vec![
1159
                        (col_a, option_asc),
1160
                        (col_b, option_asc),
1161
                        (col_c, option_asc),
1162
                    ],
1163
                    // [d ASC, a ASC, b ASC]
1164
                    vec![
1165
                        (col_d, option_asc),
1166
                        (col_a, option_asc),
1167
                        (col_b, option_asc),
1168
                    ],
1169
                ],
1170
                // EXPECTED orderings that is succinct.
1171
                vec![
1172
                    // [b ASC]
1173
                    vec![(col_b, option_asc)],
1174
                    // [a ASC, b ASC, c ASC]
1175
                    vec![
1176
                        (col_a, option_asc),
1177
                        (col_b, option_asc),
1178
                        (col_c, option_asc),
1179
                    ],
1180
                    // [d ASC]
1181
                    vec![(col_d, option_asc)],
1182
                ],
1183
            ),
1184
        ];
1185
        for (orderings, expected) in test_cases {
1186
            let orderings = convert_to_orderings(&orderings);
1187
            let expected = convert_to_orderings(&expected);
1188
            let actual = OrderingEquivalenceClass::new(orderings.clone());
1189
            let actual = actual.orderings;
1190
            let err_msg = format!(
1191
                "orderings: {:?}, expected: {:?}, actual :{:?}",
1192
                orderings, expected, actual
1193
            );
1194
            assert_eq!(actual.len(), expected.len(), "{}", err_msg);
1195
            for elem in actual {
1196
                assert!(expected.contains(&elem), "{}", err_msg);
1197
            }
1198
        }
1199
1200
        Ok(())
1201
    }
1202
}