/Users/andrewlamb/Software/datafusion/datafusion/physical-expr-common/src/sort_expr.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Sort expressions |
19 | | |
20 | | use std::fmt::{Display, Formatter}; |
21 | | use std::hash::{Hash, Hasher}; |
22 | | use std::ops::Deref; |
23 | | use std::sync::Arc; |
24 | | |
25 | | use crate::physical_expr::PhysicalExpr; |
26 | | |
27 | | use arrow::compute::kernels::sort::{SortColumn, SortOptions}; |
28 | | use arrow::datatypes::Schema; |
29 | | use arrow::record_batch::RecordBatch; |
30 | | use datafusion_common::Result; |
31 | | use datafusion_expr_common::columnar_value::ColumnarValue; |
32 | | |
33 | | /// Represents Sort operation for a column in a RecordBatch |
34 | | /// |
35 | | /// Example: |
36 | | /// ``` |
37 | | /// # use std::any::Any; |
38 | | /// # use std::fmt::Display; |
39 | | /// # use std::hash::Hasher; |
40 | | /// # use std::sync::Arc; |
41 | | /// # use arrow::array::RecordBatch; |
42 | | /// # use datafusion_common::Result; |
43 | | /// # use arrow::compute::SortOptions; |
44 | | /// # use arrow::datatypes::{DataType, Schema}; |
45 | | /// # use datafusion_expr_common::columnar_value::ColumnarValue; |
46 | | /// # use datafusion_physical_expr_common::physical_expr::PhysicalExpr; |
47 | | /// # use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; |
48 | | /// # // this crate doesn't have a physical expression implementation |
49 | | /// # // so make a really simple one |
50 | | /// # #[derive(Clone, Debug, PartialEq, Eq, Hash)] |
51 | | /// # struct MyPhysicalExpr; |
52 | | /// # impl PhysicalExpr for MyPhysicalExpr { |
53 | | /// # fn as_any(&self) -> &dyn Any {todo!() } |
54 | | /// # fn data_type(&self, input_schema: &Schema) -> Result<DataType> {todo!()} |
55 | | /// # fn nullable(&self, input_schema: &Schema) -> Result<bool> {todo!() } |
56 | | /// # fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {todo!() } |
57 | | /// # fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {todo!()} |
58 | | /// # fn with_new_children(self: Arc<Self>, children: Vec<Arc<dyn PhysicalExpr>>) -> Result<Arc<dyn PhysicalExpr>> {todo!()} |
59 | | /// # fn dyn_hash(&self, _state: &mut dyn Hasher) {todo!()} |
60 | | /// # } |
61 | | /// # impl Display for MyPhysicalExpr { |
62 | | /// # fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "a") } |
63 | | /// # } |
64 | | /// # impl PartialEq<dyn Any> for MyPhysicalExpr { |
65 | | /// # fn eq(&self, _other: &dyn Any) -> bool { true } |
66 | | /// # } |
67 | | /// # fn col(name: &str) -> Arc<dyn PhysicalExpr> { Arc::new(MyPhysicalExpr) } |
68 | | /// // Sort by a ASC |
69 | | /// let options = SortOptions::default(); |
70 | | /// let sort_expr = PhysicalSortExpr::new(col("a"), options); |
71 | | /// assert_eq!(sort_expr.to_string(), "a ASC"); |
72 | | /// |
73 | | /// // Sort by a DESC NULLS LAST |
74 | | /// let sort_expr = PhysicalSortExpr::new_default(col("a")) |
75 | | /// .desc() |
76 | | /// .nulls_last(); |
77 | | /// assert_eq!(sort_expr.to_string(), "a DESC NULLS LAST"); |
78 | | /// ``` |
79 | | #[derive(Clone, Debug)] |
80 | | pub struct PhysicalSortExpr { |
81 | | /// Physical expression representing the column to sort |
82 | | pub expr: Arc<dyn PhysicalExpr>, |
83 | | /// Option to specify how the given column should be sorted |
84 | | pub options: SortOptions, |
85 | | } |
86 | | |
87 | | impl PhysicalSortExpr { |
88 | | /// Create a new PhysicalSortExpr |
89 | 2.27k | pub fn new(expr: Arc<dyn PhysicalExpr>, options: SortOptions) -> Self { |
90 | 2.27k | Self { expr, options } |
91 | 2.27k | } |
92 | | |
93 | | /// Create a new PhysicalSortExpr with default [`SortOptions`] |
94 | 4 | pub fn new_default(expr: Arc<dyn PhysicalExpr>) -> Self { |
95 | 4 | Self::new(expr, SortOptions::default()) |
96 | 4 | } |
97 | | |
98 | | /// Set the sort sort options to ASC |
99 | 0 | pub fn asc(mut self) -> Self { |
100 | 0 | self.options.descending = false; |
101 | 0 | self |
102 | 0 | } |
103 | | |
104 | | /// Set the sort sort options to DESC |
105 | 0 | pub fn desc(mut self) -> Self { |
106 | 0 | self.options.descending = true; |
107 | 0 | self |
108 | 0 | } |
109 | | |
110 | | /// Set the sort sort options to NULLS FIRST |
111 | 0 | pub fn nulls_first(mut self) -> Self { |
112 | 0 | self.options.nulls_first = true; |
113 | 0 | self |
114 | 0 | } |
115 | | |
116 | | /// Set the sort sort options to NULLS LAST |
117 | 0 | pub fn nulls_last(mut self) -> Self { |
118 | 0 | self.options.nulls_first = false; |
119 | 0 | self |
120 | 0 | } |
121 | | } |
122 | | |
123 | | impl PartialEq for PhysicalSortExpr { |
124 | 272 | fn eq(&self, other: &PhysicalSortExpr) -> bool { |
125 | 272 | self.options == other.options && self.expr.eq(&other.expr) |
126 | 272 | } |
127 | | } |
128 | | |
129 | | impl Eq for PhysicalSortExpr {} |
130 | | |
131 | | impl Hash for PhysicalSortExpr { |
132 | 19 | fn hash<H: Hasher>(&self, state: &mut H) { |
133 | 19 | self.expr.hash(state); |
134 | 19 | self.options.hash(state); |
135 | 19 | } |
136 | | } |
137 | | |
138 | | impl Display for PhysicalSortExpr { |
139 | 5 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
140 | 5 | write!(f, "{} {}", self.expr, to_str(&self.options)) |
141 | 5 | } |
142 | | } |
143 | | |
144 | | impl PhysicalSortExpr { |
145 | | /// evaluate the sort expression into SortColumn that can be passed into arrow sort kernel |
146 | 144 | pub fn evaluate_to_sort_column(&self, batch: &RecordBatch) -> Result<SortColumn> { |
147 | 144 | let value_to_sort = self.expr.evaluate(batch)?0 ; |
148 | 144 | let array_to_sort = match value_to_sort { |
149 | 143 | ColumnarValue::Array(array) => array, |
150 | 1 | ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(batch.num_rows())?0 , |
151 | | }; |
152 | 144 | Ok(SortColumn { |
153 | 144 | values: array_to_sort, |
154 | 144 | options: Some(self.options), |
155 | 144 | }) |
156 | 144 | } |
157 | | |
158 | | /// Checks whether this sort expression satisfies the given `requirement`. |
159 | | /// If sort options are unspecified in `requirement`, only expressions are |
160 | | /// compared for inequality. |
161 | 88 | pub fn satisfy( |
162 | 88 | &self, |
163 | 88 | requirement: &PhysicalSortRequirement, |
164 | 88 | schema: &Schema, |
165 | 88 | ) -> bool { |
166 | 88 | // If the column is not nullable, NULLS FIRST/LAST is not important. |
167 | 88 | let nullable = self.expr.nullable(schema).unwrap_or(true); |
168 | 88 | self.expr.eq(&requirement.expr) |
169 | 88 | && if nullable { |
170 | 58 | requirement |
171 | 58 | .options |
172 | 58 | .map_or(true, |opts| self.options == opts) |
173 | | } else { |
174 | 30 | requirement |
175 | 30 | .options |
176 | 30 | .map_or(true, |opts| self.options.descending == opts.descending) |
177 | | } |
178 | 88 | } |
179 | | |
180 | | /// Returns a [`Display`]able list of `PhysicalSortExpr`. |
181 | 4 | pub fn format_list(input: &[PhysicalSortExpr]) -> impl Display + '_ { |
182 | | struct DisplayableList<'a>(&'a [PhysicalSortExpr]); |
183 | | impl<'a> Display for DisplayableList<'a> { |
184 | 4 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
185 | 4 | let mut first = true; |
186 | 8 | for sort_expr4 in self.0 { |
187 | 4 | if first { |
188 | 4 | first = false; |
189 | 4 | } else { |
190 | 0 | write!(f, ",")?; |
191 | | } |
192 | 4 | write!(f, "{}", sort_expr)?0 ; |
193 | | } |
194 | 4 | Ok(()) |
195 | 4 | } |
196 | | } |
197 | 4 | DisplayableList(input) |
198 | 4 | } |
199 | | } |
200 | | |
201 | | /// Represents sort requirement associated with a plan |
202 | | /// |
203 | | /// If the requirement includes [`SortOptions`] then both the |
204 | | /// expression *and* the sort options must match. |
205 | | /// |
206 | | /// If the requirement does not include [`SortOptions`]) then only the |
207 | | /// expressions must match. |
208 | | /// |
209 | | /// # Examples |
210 | | /// |
211 | | /// With sort options (`A`, `DESC NULLS FIRST`): |
212 | | /// * `ORDER BY A DESC NULLS FIRST` matches |
213 | | /// * `ORDER BY A ASC NULLS FIRST` does not match (`ASC` vs `DESC`) |
214 | | /// * `ORDER BY B DESC NULLS FIRST` does not match (different expr) |
215 | | /// |
216 | | /// Without sort options (`A`, None): |
217 | | /// * `ORDER BY A DESC NULLS FIRST` matches |
218 | | /// * `ORDER BY A ASC NULLS FIRST` matches (`ASC` and `NULL` options ignored) |
219 | | /// * `ORDER BY B DESC NULLS FIRST` does not match (different expr) |
220 | | #[derive(Clone, Debug)] |
221 | | pub struct PhysicalSortRequirement { |
222 | | /// Physical expression representing the column to sort |
223 | | pub expr: Arc<dyn PhysicalExpr>, |
224 | | /// Option to specify how the given column should be sorted. |
225 | | /// If unspecified, there are no constraints on sort options. |
226 | | pub options: Option<SortOptions>, |
227 | | } |
228 | | |
229 | | impl From<PhysicalSortRequirement> for PhysicalSortExpr { |
230 | | /// If options is `None`, the default sort options `ASC, NULLS LAST` is used. |
231 | | /// |
232 | | /// The default is picked to be consistent with |
233 | | /// PostgreSQL: <https://www.postgresql.org/docs/current/queries-order.html> |
234 | 2.14k | fn from(value: PhysicalSortRequirement) -> Self { |
235 | 2.14k | let options = value.options.unwrap_or(SortOptions { |
236 | 2.14k | descending: false, |
237 | 2.14k | nulls_first: false, |
238 | 2.14k | }); |
239 | 2.14k | PhysicalSortExpr::new(value.expr, options) |
240 | 2.14k | } |
241 | | } |
242 | | |
243 | | impl From<PhysicalSortExpr> for PhysicalSortRequirement { |
244 | 3.48k | fn from(value: PhysicalSortExpr) -> Self { |
245 | 3.48k | PhysicalSortRequirement::new(value.expr, Some(value.options)) |
246 | 3.48k | } |
247 | | } |
248 | | |
249 | | impl PartialEq for PhysicalSortRequirement { |
250 | 9 | fn eq(&self, other: &PhysicalSortRequirement) -> bool { |
251 | 9 | self.options == other.options && self.expr.eq(&other.expr) |
252 | 9 | } |
253 | | } |
254 | | |
255 | | impl Display for PhysicalSortRequirement { |
256 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
257 | 0 | let opts_string = self.options.as_ref().map_or("NA", to_str); |
258 | 0 | write!(f, "{} {}", self.expr, opts_string) |
259 | 0 | } |
260 | | } |
261 | | |
262 | | /// Writes a list of [`PhysicalSortRequirement`]s to a `std::fmt::Formatter`. |
263 | | /// |
264 | | /// Example output: `[a + 1, b]` |
265 | 0 | pub fn format_physical_sort_requirement_list( |
266 | 0 | exprs: &[PhysicalSortRequirement], |
267 | 0 | ) -> impl Display + '_ { |
268 | | struct DisplayWrapper<'a>(&'a [PhysicalSortRequirement]); |
269 | | impl<'a> Display for DisplayWrapper<'a> { |
270 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
271 | 0 | let mut iter = self.0.iter(); |
272 | 0 | write!(f, "[")?; |
273 | 0 | if let Some(expr) = iter.next() { |
274 | 0 | write!(f, "{}", expr)?; |
275 | 0 | } |
276 | 0 | for expr in iter { |
277 | 0 | write!(f, ", {}", expr)?; |
278 | | } |
279 | 0 | write!(f, "]")?; |
280 | 0 | Ok(()) |
281 | 0 | } |
282 | | } |
283 | 0 | DisplayWrapper(exprs) |
284 | 0 | } |
285 | | |
286 | | impl PhysicalSortRequirement { |
287 | | /// Creates a new requirement. |
288 | | /// |
289 | | /// If `options` is `Some(..)`, creates an `exact` requirement, |
290 | | /// which must match both `options` and `expr`. |
291 | | /// |
292 | | /// If `options` is `None`, Creates a new `expr_only` requirement, |
293 | | /// which must match only `expr`. |
294 | | /// |
295 | | /// See [`PhysicalSortRequirement`] for examples. |
296 | 3.49k | pub fn new(expr: Arc<dyn PhysicalExpr>, options: Option<SortOptions>) -> Self { |
297 | 3.49k | Self { expr, options } |
298 | 3.49k | } |
299 | | |
300 | | /// Replace the required expression for this requirement with the new one |
301 | 0 | pub fn with_expr(mut self, expr: Arc<dyn PhysicalExpr>) -> Self { |
302 | 0 | self.expr = expr; |
303 | 0 | self |
304 | 0 | } |
305 | | |
306 | | /// Returns whether this requirement is equal or more specific than `other`. |
307 | 0 | pub fn compatible(&self, other: &PhysicalSortRequirement) -> bool { |
308 | 0 | self.expr.eq(&other.expr) |
309 | 0 | && other.options.map_or(true, |other_opts| { |
310 | 0 | self.options.map_or(false, |opts| opts == other_opts) |
311 | 0 | }) |
312 | 0 | } |
313 | | |
314 | | /// Returns [`PhysicalSortRequirement`] that requires the exact |
315 | | /// sort of the [`PhysicalSortExpr`]s in `ordering` |
316 | | /// |
317 | | /// This method takes `&'a PhysicalSortExpr` to make it easy to |
318 | | /// use implementing [`ExecutionPlan::required_input_ordering`]. |
319 | | /// |
320 | | /// [`ExecutionPlan::required_input_ordering`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#method.required_input_ordering |
321 | 1.36k | pub fn from_sort_exprs<'a>( |
322 | 1.36k | ordering: impl IntoIterator<Item = &'a PhysicalSortExpr>, |
323 | 1.36k | ) -> LexRequirement { |
324 | 1.36k | LexRequirement::new( |
325 | 1.36k | ordering |
326 | 1.36k | .into_iter() |
327 | 1.36k | .cloned() |
328 | 1.36k | .map(PhysicalSortRequirement::from) |
329 | 1.36k | .collect(), |
330 | 1.36k | ) |
331 | 1.36k | } |
332 | | |
333 | | /// Converts an iterator of [`PhysicalSortRequirement`] into a Vec |
334 | | /// of [`PhysicalSortExpr`]s. |
335 | | /// |
336 | | /// This function converts `PhysicalSortRequirement` to `PhysicalSortExpr` |
337 | | /// for each entry in the input. If required ordering is None for an entry |
338 | | /// default ordering `ASC, NULLS LAST` if given (see the `PhysicalSortExpr::from`). |
339 | 1.00k | pub fn to_sort_exprs( |
340 | 1.00k | requirements: impl IntoIterator<Item = PhysicalSortRequirement>, |
341 | 1.00k | ) -> Vec<PhysicalSortExpr> { |
342 | 1.00k | requirements |
343 | 1.00k | .into_iter() |
344 | 1.00k | .map(PhysicalSortExpr::from) |
345 | 1.00k | .collect() |
346 | 1.00k | } |
347 | | } |
348 | | |
349 | | /// Returns the SQL string representation of the given [SortOptions] object. |
350 | | #[inline] |
351 | 5 | fn to_str(options: &SortOptions) -> &str { |
352 | 5 | match (options.descending, options.nulls_first) { |
353 | 0 | (true, true) => "DESC", |
354 | 0 | (true, false) => "DESC NULLS LAST", |
355 | 4 | (false, true) => "ASC", |
356 | 1 | (false, false) => "ASC NULLS LAST", |
357 | | } |
358 | 5 | } |
359 | | |
360 | | ///`LexOrdering` is an alias for the type `Vec<PhysicalSortExpr>`, which represents |
361 | | /// a lexicographical ordering. |
362 | | pub type LexOrdering = Vec<PhysicalSortExpr>; |
363 | | |
364 | | ///`LexOrderingRef` is an alias for the type &`[PhysicalSortExpr]`, which represents |
365 | | /// a reference to a lexicographical ordering. |
366 | | pub type LexOrderingRef<'a> = &'a [PhysicalSortExpr]; |
367 | | |
368 | | ///`LexRequirement` is an struct containing a `Vec<PhysicalSortRequirement>`, which |
369 | | /// represents a lexicographical ordering requirement. |
370 | | #[derive(Debug, Default, Clone, PartialEq)] |
371 | | pub struct LexRequirement { |
372 | | pub inner: Vec<PhysicalSortRequirement>, |
373 | | } |
374 | | |
375 | | impl LexRequirement { |
376 | 6.74k | pub fn new(inner: Vec<PhysicalSortRequirement>) -> Self { |
377 | 6.74k | Self { inner } |
378 | 6.74k | } |
379 | | |
380 | 1.26k | pub fn iter(&self) -> impl Iterator<Item = &PhysicalSortRequirement> { |
381 | 1.26k | self.inner.iter() |
382 | 1.26k | } |
383 | | |
384 | 9 | pub fn push(&mut self, physical_sort_requirement: PhysicalSortRequirement) { |
385 | 9 | self.inner.push(physical_sort_requirement) |
386 | 9 | } |
387 | | } |
388 | | |
389 | | impl Deref for LexRequirement { |
390 | | type Target = [PhysicalSortRequirement]; |
391 | | |
392 | 1.36k | fn deref(&self) -> &Self::Target { |
393 | 1.36k | self.inner.as_slice() |
394 | 1.36k | } |
395 | | } |
396 | | |
397 | | impl FromIterator<PhysicalSortRequirement> for LexRequirement { |
398 | 1.25k | fn from_iter<T: IntoIterator<Item = PhysicalSortRequirement>>(iter: T) -> Self { |
399 | 1.25k | let mut lex_requirement = LexRequirement::new(vec![]); |
400 | | |
401 | 3.58k | for i2.33k in iter { |
402 | 2.33k | lex_requirement.inner.push(i); |
403 | 2.33k | } |
404 | | |
405 | 1.25k | lex_requirement |
406 | 1.25k | } |
407 | | } |
408 | | |
409 | | impl IntoIterator for LexRequirement { |
410 | | type Item = PhysicalSortRequirement; |
411 | | type IntoIter = std::vec::IntoIter<Self::Item>; |
412 | | |
413 | 3.89k | fn into_iter(self) -> Self::IntoIter { |
414 | 3.89k | self.inner.into_iter() |
415 | 3.89k | } |
416 | | } |
417 | | |
418 | | ///`LexRequirementRef` is an alias for the type &`[PhysicalSortRequirement]`, which |
419 | | /// represents a reference to a lexicographical ordering requirement. |
420 | | pub type LexRequirementRef<'a> = &'a [PhysicalSortRequirement]; |