Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr-common/src/sort_expr.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Sort expressions
19
20
use std::fmt::{Display, Formatter};
21
use std::hash::{Hash, Hasher};
22
use std::ops::Deref;
23
use std::sync::Arc;
24
25
use crate::physical_expr::PhysicalExpr;
26
27
use arrow::compute::kernels::sort::{SortColumn, SortOptions};
28
use arrow::datatypes::Schema;
29
use arrow::record_batch::RecordBatch;
30
use datafusion_common::Result;
31
use datafusion_expr_common::columnar_value::ColumnarValue;
32
33
/// Represents Sort operation for a column in a RecordBatch
34
///
35
/// Example:
36
/// ```
37
/// # use std::any::Any;
38
/// # use std::fmt::Display;
39
/// # use std::hash::Hasher;
40
/// # use std::sync::Arc;
41
/// # use arrow::array::RecordBatch;
42
/// # use datafusion_common::Result;
43
/// # use arrow::compute::SortOptions;
44
/// # use arrow::datatypes::{DataType, Schema};
45
/// # use datafusion_expr_common::columnar_value::ColumnarValue;
46
/// # use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
47
/// # use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
48
/// # // this crate doesn't have a physical expression implementation
49
/// # // so make a really simple one
50
/// # #[derive(Clone, Debug, PartialEq, Eq, Hash)]
51
/// # struct MyPhysicalExpr;
52
/// # impl PhysicalExpr for MyPhysicalExpr {
53
/// #  fn as_any(&self) -> &dyn Any {todo!() }
54
/// #  fn data_type(&self, input_schema: &Schema) -> Result<DataType> {todo!()}
55
/// #  fn nullable(&self, input_schema: &Schema) -> Result<bool> {todo!() }
56
/// #  fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {todo!() }
57
/// #  fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {todo!()}
58
/// #  fn with_new_children(self: Arc<Self>, children: Vec<Arc<dyn PhysicalExpr>>) -> Result<Arc<dyn PhysicalExpr>> {todo!()}
59
/// #  fn dyn_hash(&self, _state: &mut dyn Hasher) {todo!()}
60
/// # }
61
/// # impl Display for MyPhysicalExpr {
62
/// #    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "a") }
63
/// # }
64
/// # impl PartialEq<dyn Any> for MyPhysicalExpr {
65
/// #    fn eq(&self, _other: &dyn Any) -> bool { true }
66
/// # }
67
/// # fn col(name: &str) -> Arc<dyn PhysicalExpr> { Arc::new(MyPhysicalExpr) }
68
/// // Sort by a ASC
69
/// let options = SortOptions::default();
70
/// let sort_expr = PhysicalSortExpr::new(col("a"), options);
71
/// assert_eq!(sort_expr.to_string(), "a ASC");
72
///
73
/// // Sort by a DESC NULLS LAST
74
/// let sort_expr = PhysicalSortExpr::new_default(col("a"))
75
///   .desc()
76
///   .nulls_last();
77
/// assert_eq!(sort_expr.to_string(), "a DESC NULLS LAST");
78
/// ```
79
#[derive(Clone, Debug)]
80
pub struct PhysicalSortExpr {
81
    /// Physical expression representing the column to sort
82
    pub expr: Arc<dyn PhysicalExpr>,
83
    /// Option to specify how the given column should be sorted
84
    pub options: SortOptions,
85
}
86
87
impl PhysicalSortExpr {
88
    /// Create a new PhysicalSortExpr
89
2.27k
    pub fn new(expr: Arc<dyn PhysicalExpr>, options: SortOptions) -> Self {
90
2.27k
        Self { expr, options }
91
2.27k
    }
92
93
    /// Create a new PhysicalSortExpr with default [`SortOptions`]
94
4
    pub fn new_default(expr: Arc<dyn PhysicalExpr>) -> Self {
95
4
        Self::new(expr, SortOptions::default())
96
4
    }
97
98
    /// Set the sort sort options to ASC
99
0
    pub fn asc(mut self) -> Self {
100
0
        self.options.descending = false;
101
0
        self
102
0
    }
103
104
    /// Set the sort sort options to DESC
105
0
    pub fn desc(mut self) -> Self {
106
0
        self.options.descending = true;
107
0
        self
108
0
    }
109
110
    /// Set the sort sort options to NULLS FIRST
111
0
    pub fn nulls_first(mut self) -> Self {
112
0
        self.options.nulls_first = true;
113
0
        self
114
0
    }
115
116
    /// Set the sort sort options to NULLS LAST
117
0
    pub fn nulls_last(mut self) -> Self {
118
0
        self.options.nulls_first = false;
119
0
        self
120
0
    }
121
}
122
123
impl PartialEq for PhysicalSortExpr {
124
272
    fn eq(&self, other: &PhysicalSortExpr) -> bool {
125
272
        self.options == other.options && self.expr.eq(&other.expr)
126
272
    }
127
}
128
129
impl Eq for PhysicalSortExpr {}
130
131
impl Hash for PhysicalSortExpr {
132
19
    fn hash<H: Hasher>(&self, state: &mut H) {
133
19
        self.expr.hash(state);
134
19
        self.options.hash(state);
135
19
    }
136
}
137
138
impl Display for PhysicalSortExpr {
139
5
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
140
5
        write!(f, "{} {}", self.expr, to_str(&self.options))
141
5
    }
142
}
143
144
impl PhysicalSortExpr {
145
    /// evaluate the sort expression into SortColumn that can be passed into arrow sort kernel
146
144
    pub fn evaluate_to_sort_column(&self, batch: &RecordBatch) -> Result<SortColumn> {
147
144
        let value_to_sort = self.expr.evaluate(batch)
?0
;
148
144
        let array_to_sort = match value_to_sort {
149
143
            ColumnarValue::Array(array) => array,
150
1
            ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(batch.num_rows())
?0
,
151
        };
152
144
        Ok(SortColumn {
153
144
            values: array_to_sort,
154
144
            options: Some(self.options),
155
144
        })
156
144
    }
157
158
    /// Checks whether this sort expression satisfies the given `requirement`.
159
    /// If sort options are unspecified in `requirement`, only expressions are
160
    /// compared for inequality.
161
88
    pub fn satisfy(
162
88
        &self,
163
88
        requirement: &PhysicalSortRequirement,
164
88
        schema: &Schema,
165
88
    ) -> bool {
166
88
        // If the column is not nullable, NULLS FIRST/LAST is not important.
167
88
        let nullable = self.expr.nullable(schema).unwrap_or(true);
168
88
        self.expr.eq(&requirement.expr)
169
88
            && if nullable {
170
58
                requirement
171
58
                    .options
172
58
                    .map_or(true, |opts| self.options == opts)
173
            } else {
174
30
                requirement
175
30
                    .options
176
30
                    .map_or(true, |opts| self.options.descending == opts.descending)
177
            }
178
88
    }
179
180
    /// Returns a [`Display`]able list of `PhysicalSortExpr`.
181
4
    pub fn format_list(input: &[PhysicalSortExpr]) -> impl Display + '_ {
182
        struct DisplayableList<'a>(&'a [PhysicalSortExpr]);
183
        impl<'a> Display for DisplayableList<'a> {
184
4
            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
185
4
                let mut first = true;
186
8
                for 
sort_expr4
in self.0 {
187
4
                    if first {
188
4
                        first = false;
189
4
                    } else {
190
0
                        write!(f, ",")?;
191
                    }
192
4
                    write!(f, "{}", sort_expr)
?0
;
193
                }
194
4
                Ok(())
195
4
            }
196
        }
197
4
        DisplayableList(input)
198
4
    }
199
}
200
201
/// Represents sort requirement associated with a plan
202
///
203
/// If the requirement includes [`SortOptions`] then both the
204
/// expression *and* the sort options must match.
205
///
206
/// If the requirement does not include [`SortOptions`]) then only the
207
/// expressions must match.
208
///
209
/// # Examples
210
///
211
/// With sort options (`A`, `DESC NULLS FIRST`):
212
/// * `ORDER BY A DESC NULLS FIRST` matches
213
/// * `ORDER BY A ASC  NULLS FIRST` does not match (`ASC` vs `DESC`)
214
/// * `ORDER BY B DESC NULLS FIRST` does not match (different expr)
215
///
216
/// Without sort options (`A`, None):
217
/// * `ORDER BY A DESC NULLS FIRST` matches
218
/// * `ORDER BY A ASC  NULLS FIRST` matches (`ASC` and `NULL` options ignored)
219
/// * `ORDER BY B DESC NULLS FIRST` does not match  (different expr)
220
#[derive(Clone, Debug)]
221
pub struct PhysicalSortRequirement {
222
    /// Physical expression representing the column to sort
223
    pub expr: Arc<dyn PhysicalExpr>,
224
    /// Option to specify how the given column should be sorted.
225
    /// If unspecified, there are no constraints on sort options.
226
    pub options: Option<SortOptions>,
227
}
228
229
impl From<PhysicalSortRequirement> for PhysicalSortExpr {
230
    /// If options is `None`, the default sort options `ASC, NULLS LAST` is used.
231
    ///
232
    /// The default is picked to be consistent with
233
    /// PostgreSQL: <https://www.postgresql.org/docs/current/queries-order.html>    
234
2.14k
    fn from(value: PhysicalSortRequirement) -> Self {
235
2.14k
        let options = value.options.unwrap_or(SortOptions {
236
2.14k
            descending: false,
237
2.14k
            nulls_first: false,
238
2.14k
        });
239
2.14k
        PhysicalSortExpr::new(value.expr, options)
240
2.14k
    }
241
}
242
243
impl From<PhysicalSortExpr> for PhysicalSortRequirement {
244
3.48k
    fn from(value: PhysicalSortExpr) -> Self {
245
3.48k
        PhysicalSortRequirement::new(value.expr, Some(value.options))
246
3.48k
    }
247
}
248
249
impl PartialEq for PhysicalSortRequirement {
250
9
    fn eq(&self, other: &PhysicalSortRequirement) -> bool {
251
9
        self.options == other.options && self.expr.eq(&other.expr)
252
9
    }
253
}
254
255
impl Display for PhysicalSortRequirement {
256
0
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
257
0
        let opts_string = self.options.as_ref().map_or("NA", to_str);
258
0
        write!(f, "{} {}", self.expr, opts_string)
259
0
    }
260
}
261
262
/// Writes a list of [`PhysicalSortRequirement`]s to a `std::fmt::Formatter`.
263
///
264
/// Example output: `[a + 1, b]`
265
0
pub fn format_physical_sort_requirement_list(
266
0
    exprs: &[PhysicalSortRequirement],
267
0
) -> impl Display + '_ {
268
    struct DisplayWrapper<'a>(&'a [PhysicalSortRequirement]);
269
    impl<'a> Display for DisplayWrapper<'a> {
270
0
        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
271
0
            let mut iter = self.0.iter();
272
0
            write!(f, "[")?;
273
0
            if let Some(expr) = iter.next() {
274
0
                write!(f, "{}", expr)?;
275
0
            }
276
0
            for expr in iter {
277
0
                write!(f, ", {}", expr)?;
278
            }
279
0
            write!(f, "]")?;
280
0
            Ok(())
281
0
        }
282
    }
283
0
    DisplayWrapper(exprs)
284
0
}
285
286
impl PhysicalSortRequirement {
287
    /// Creates a new requirement.
288
    ///
289
    /// If `options` is `Some(..)`, creates an `exact` requirement,
290
    /// which must match both `options` and `expr`.
291
    ///
292
    /// If `options` is `None`, Creates a new `expr_only` requirement,
293
    /// which must match only `expr`.
294
    ///
295
    /// See [`PhysicalSortRequirement`] for examples.
296
3.49k
    pub fn new(expr: Arc<dyn PhysicalExpr>, options: Option<SortOptions>) -> Self {
297
3.49k
        Self { expr, options }
298
3.49k
    }
299
300
    /// Replace the required expression for this requirement with the new one
301
0
    pub fn with_expr(mut self, expr: Arc<dyn PhysicalExpr>) -> Self {
302
0
        self.expr = expr;
303
0
        self
304
0
    }
305
306
    /// Returns whether this requirement is equal or more specific than `other`.
307
0
    pub fn compatible(&self, other: &PhysicalSortRequirement) -> bool {
308
0
        self.expr.eq(&other.expr)
309
0
            && other.options.map_or(true, |other_opts| {
310
0
                self.options.map_or(false, |opts| opts == other_opts)
311
0
            })
312
0
    }
313
314
    /// Returns [`PhysicalSortRequirement`] that requires the exact
315
    /// sort of the [`PhysicalSortExpr`]s in `ordering`
316
    ///
317
    /// This method takes `&'a PhysicalSortExpr` to make it easy to
318
    /// use implementing [`ExecutionPlan::required_input_ordering`].
319
    ///
320
    /// [`ExecutionPlan::required_input_ordering`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#method.required_input_ordering
321
1.36k
    pub fn from_sort_exprs<'a>(
322
1.36k
        ordering: impl IntoIterator<Item = &'a PhysicalSortExpr>,
323
1.36k
    ) -> LexRequirement {
324
1.36k
        LexRequirement::new(
325
1.36k
            ordering
326
1.36k
                .into_iter()
327
1.36k
                .cloned()
328
1.36k
                .map(PhysicalSortRequirement::from)
329
1.36k
                .collect(),
330
1.36k
        )
331
1.36k
    }
332
333
    /// Converts an iterator of [`PhysicalSortRequirement`] into a Vec
334
    /// of [`PhysicalSortExpr`]s.
335
    ///
336
    /// This function converts `PhysicalSortRequirement` to `PhysicalSortExpr`
337
    /// for each entry in the input. If required ordering is None for an entry
338
    /// default ordering `ASC, NULLS LAST` if given (see the `PhysicalSortExpr::from`).
339
1.00k
    pub fn to_sort_exprs(
340
1.00k
        requirements: impl IntoIterator<Item = PhysicalSortRequirement>,
341
1.00k
    ) -> Vec<PhysicalSortExpr> {
342
1.00k
        requirements
343
1.00k
            .into_iter()
344
1.00k
            .map(PhysicalSortExpr::from)
345
1.00k
            .collect()
346
1.00k
    }
347
}
348
349
/// Returns the SQL string representation of the given [SortOptions] object.
350
#[inline]
351
5
fn to_str(options: &SortOptions) -> &str {
352
5
    match (options.descending, options.nulls_first) {
353
0
        (true, true) => "DESC",
354
0
        (true, false) => "DESC NULLS LAST",
355
4
        (false, true) => "ASC",
356
1
        (false, false) => "ASC NULLS LAST",
357
    }
358
5
}
359
360
///`LexOrdering` is an alias for the type `Vec<PhysicalSortExpr>`, which represents
361
/// a lexicographical ordering.
362
pub type LexOrdering = Vec<PhysicalSortExpr>;
363
364
///`LexOrderingRef` is an alias for the type &`[PhysicalSortExpr]`, which represents
365
/// a reference to a lexicographical ordering.
366
pub type LexOrderingRef<'a> = &'a [PhysicalSortExpr];
367
368
///`LexRequirement` is an struct containing a `Vec<PhysicalSortRequirement>`, which
369
/// represents a lexicographical ordering requirement.
370
#[derive(Debug, Default, Clone, PartialEq)]
371
pub struct LexRequirement {
372
    pub inner: Vec<PhysicalSortRequirement>,
373
}
374
375
impl LexRequirement {
376
6.74k
    pub fn new(inner: Vec<PhysicalSortRequirement>) -> Self {
377
6.74k
        Self { inner }
378
6.74k
    }
379
380
1.26k
    pub fn iter(&self) -> impl Iterator<Item = &PhysicalSortRequirement> {
381
1.26k
        self.inner.iter()
382
1.26k
    }
383
384
9
    pub fn push(&mut self, physical_sort_requirement: PhysicalSortRequirement) {
385
9
        self.inner.push(physical_sort_requirement)
386
9
    }
387
}
388
389
impl Deref for LexRequirement {
390
    type Target = [PhysicalSortRequirement];
391
392
1.36k
    fn deref(&self) -> &Self::Target {
393
1.36k
        self.inner.as_slice()
394
1.36k
    }
395
}
396
397
impl FromIterator<PhysicalSortRequirement> for LexRequirement {
398
1.25k
    fn from_iter<T: IntoIterator<Item = PhysicalSortRequirement>>(iter: T) -> Self {
399
1.25k
        let mut lex_requirement = LexRequirement::new(vec![]);
400
401
3.58k
        for 
i2.33k
in iter {
402
2.33k
            lex_requirement.inner.push(i);
403
2.33k
        }
404
405
1.25k
        lex_requirement
406
1.25k
    }
407
}
408
409
impl IntoIterator for LexRequirement {
410
    type Item = PhysicalSortRequirement;
411
    type IntoIter = std::vec::IntoIter<Self::Item>;
412
413
3.89k
    fn into_iter(self) -> Self::IntoIter {
414
3.89k
        self.inner.into_iter()
415
3.89k
    }
416
}
417
418
///`LexRequirementRef` is an alias for the type &`[PhysicalSortRequirement]`, which
419
/// represents a reference to a lexicographical ordering requirement.
420
pub type LexRequirementRef<'a> = &'a [PhysicalSortRequirement];