Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/expr/src/window_frame.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Window frame module
19
//!
20
//! The frame-spec determines which output rows are read by an aggregate window function. The frame-spec consists of four parts:
21
//! - A frame type - either ROWS, RANGE or GROUPS,
22
//! - A starting frame boundary,
23
//! - An ending frame boundary,
24
//! - An EXCLUDE clause.
25
26
use std::fmt::{self, Formatter};
27
use std::hash::Hash;
28
29
use crate::{expr::Sort, lit};
30
31
use datafusion_common::{plan_err, sql_err, DataFusionError, Result, ScalarValue};
32
use sqlparser::ast;
33
use sqlparser::parser::ParserError::ParserError;
34
35
/// The frame specification determines which output rows are read by an aggregate
36
/// window function. The ending frame boundary can be omitted if the `BETWEEN`
37
/// and `AND` keywords that surround the starting frame boundary are also omitted,
38
/// in which case the ending frame boundary defaults to `CURRENT ROW`.
39
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash)]
40
pub struct WindowFrame {
41
    /// Frame type - either `ROWS`, `RANGE` or `GROUPS`
42
    pub units: WindowFrameUnits,
43
    /// Starting frame boundary
44
    pub start_bound: WindowFrameBound,
45
    /// Ending frame boundary
46
    pub end_bound: WindowFrameBound,
47
    /// Flag indicating whether the frame is causal (i.e. computing the result
48
    /// for the current row doesn't depend on any subsequent rows).
49
    ///
50
    /// Example causal window frames:
51
    /// ```text
52
    ///                +--------------+
53
    ///      Future    |              |
54
    ///         |      |              |
55
    ///         |      |              |
56
    ///    Current Row |+------------+|  ---
57
    ///         |      |              |   |
58
    ///         |      |              |   |
59
    ///         |      |              |   |  Window Frame 1
60
    ///       Past     |              |   |
61
    ///                |              |   |
62
    ///                |              |  ---
63
    ///                +--------------+
64
    ///
65
    ///                +--------------+
66
    ///      Future    |              |
67
    ///         |      |              |
68
    ///         |      |              |
69
    ///    Current Row |+------------+|
70
    ///         |      |              |
71
    ///         |      |              | ---
72
    ///         |      |              |  |
73
    ///       Past     |              |  |  Window Frame 2
74
    ///                |              |  |
75
    ///                |              | ---
76
    ///                +--------------+
77
    /// ```
78
    /// Example non-causal window frame:
79
    /// ```text
80
    ///                +--------------+
81
    ///      Future    |              |
82
    ///         |      |              |
83
    ///         |      |              | ---
84
    ///    Current Row |+------------+|  |
85
    ///         |      |              |  |  Window Frame 3
86
    ///         |      |              |  |
87
    ///         |      |              | ---
88
    ///       Past     |              |
89
    ///                |              |
90
    ///                |              |
91
    ///                +--------------+
92
    /// ```
93
    causal: bool,
94
}
95
96
impl fmt::Display for WindowFrame {
97
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
98
0
        write!(
99
0
            f,
100
0
            "{} BETWEEN {} AND {}",
101
0
            self.units, self.start_bound, self.end_bound
102
0
        )?;
103
0
        Ok(())
104
0
    }
105
}
106
107
impl fmt::Debug for WindowFrame {
108
4
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
109
4
        write!(
110
4
            f,
111
4
            "WindowFrame {{ units: {:?}, start_bound: {:?}, end_bound: {:?}, is_causal: {:?} }}",
112
4
            self.units, self.start_bound, self.end_bound, self.causal
113
4
        )
?0
;
114
4
        Ok(())
115
4
    }
116
}
117
118
impl TryFrom<ast::WindowFrame> for WindowFrame {
119
    type Error = DataFusionError;
120
121
0
    fn try_from(value: ast::WindowFrame) -> Result<Self> {
122
0
        let start_bound = value.start_bound.try_into()?;
123
0
        let end_bound = match value.end_bound {
124
0
            Some(value) => value.try_into()?,
125
0
            None => WindowFrameBound::CurrentRow,
126
        };
127
128
0
        if let WindowFrameBound::Following(val) = &start_bound {
129
0
            if val.is_null() {
130
0
                plan_err!(
131
0
                    "Invalid window frame: start bound cannot be UNBOUNDED FOLLOWING"
132
0
                )?
133
0
            }
134
0
        } else if let WindowFrameBound::Preceding(val) = &end_bound {
135
0
            if val.is_null() {
136
0
                plan_err!(
137
0
                    "Invalid window frame: end bound cannot be UNBOUNDED PRECEDING"
138
0
                )?
139
0
            }
140
0
        };
141
0
        let units = value.units.into();
142
0
        Ok(Self::new_bounds(units, start_bound, end_bound))
143
0
    }
144
}
145
146
impl WindowFrame {
147
    /// Creates a new, default window frame (with the meaning of default
148
    /// depending on whether the frame contains an `ORDER BY` clause and this
149
    /// ordering is strict (i.e. no ties).
150
1
    pub fn new(order_by: Option<bool>) -> Self {
151
1
        if let Some(
strict0
) = order_by {
152
            // This window frame covers the table (or partition if `PARTITION BY`
153
            // is used) from beginning to the `CURRENT ROW` (with same rank). It
154
            // is used when the `OVER` clause contains an `ORDER BY` clause but
155
            // no frame.
156
            Self {
157
0
                units: if strict {
158
0
                    WindowFrameUnits::Rows
159
                } else {
160
0
                    WindowFrameUnits::Range
161
                },
162
0
                start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
163
0
                end_bound: WindowFrameBound::CurrentRow,
164
0
                causal: strict,
165
            }
166
        } else {
167
            // This window frame covers the whole table (or partition if `PARTITION BY`
168
            // is used). It is used when the `OVER` clause does not contain an
169
            // `ORDER BY` clause and there is no frame.
170
1
            Self {
171
1
                units: WindowFrameUnits::Rows,
172
1
                start_bound: WindowFrameBound::Preceding(ScalarValue::UInt64(None)),
173
1
                end_bound: WindowFrameBound::Following(ScalarValue::UInt64(None)),
174
1
                causal: false,
175
1
            }
176
        }
177
1
    }
178
179
    /// Get reversed window frame. For example
180
    /// `3 ROWS PRECEDING AND 2 ROWS FOLLOWING` -->
181
    /// `2 ROWS PRECEDING AND 3 ROWS FOLLOWING`
182
0
    pub fn reverse(&self) -> Self {
183
0
        let start_bound = match &self.end_bound {
184
0
            WindowFrameBound::Preceding(value) => {
185
0
                WindowFrameBound::Following(value.clone())
186
            }
187
0
            WindowFrameBound::Following(value) => {
188
0
                WindowFrameBound::Preceding(value.clone())
189
            }
190
0
            WindowFrameBound::CurrentRow => WindowFrameBound::CurrentRow,
191
        };
192
0
        let end_bound = match &self.start_bound {
193
0
            WindowFrameBound::Preceding(value) => {
194
0
                WindowFrameBound::Following(value.clone())
195
            }
196
0
            WindowFrameBound::Following(value) => {
197
0
                WindowFrameBound::Preceding(value.clone())
198
            }
199
0
            WindowFrameBound::CurrentRow => WindowFrameBound::CurrentRow,
200
        };
201
0
        Self::new_bounds(self.units, start_bound, end_bound)
202
0
    }
203
204
    /// Get whether window frame is causal
205
21
    pub fn is_causal(&self) -> bool {
206
21
        self.causal
207
21
    }
208
209
    /// Initializes window frame from units (type), start bound and end bound.
210
4
    pub fn new_bounds(
211
4
        units: WindowFrameUnits,
212
4
        start_bound: WindowFrameBound,
213
4
        end_bound: WindowFrameBound,
214
4
    ) -> Self {
215
4
        let causal = match units {
216
3
            WindowFrameUnits::Rows => match &end_bound {
217
0
                WindowFrameBound::Following(value) => {
218
0
                    if value.is_null() {
219
                        // Unbounded following
220
0
                        false
221
                    } else {
222
0
                        let zero = ScalarValue::new_zero(&value.data_type());
223
0
                        zero.map(|zero| value.eq(&zero)).unwrap_or(false)
224
                    }
225
                }
226
3
                _ => true,
227
            },
228
1
            WindowFrameUnits::Range | WindowFrameUnits::Groups => match &end_bound {
229
0
                WindowFrameBound::Preceding(value) => {
230
0
                    if value.is_null() {
231
                        // Unbounded preceding
232
0
                        true
233
                    } else {
234
0
                        let zero = ScalarValue::new_zero(&value.data_type());
235
0
                        zero.map(|zero| value.gt(&zero)).unwrap_or(false)
236
                    }
237
                }
238
1
                _ => false,
239
            },
240
        };
241
4
        Self {
242
4
            units,
243
4
            start_bound,
244
4
            end_bound,
245
4
            causal,
246
4
        }
247
4
    }
248
249
    /// Regularizes the ORDER BY clause of the window frame.
250
0
    pub fn regularize_order_bys(&self, order_by: &mut Vec<Sort>) -> Result<()> {
251
0
        match self.units {
252
            // Normally, RANGE frames require an ORDER BY clause with exactly
253
            // one column. However, an ORDER BY clause may be absent or have
254
            // more than one column when the start/end bounds are UNBOUNDED or
255
            // CURRENT ROW.
256
0
            WindowFrameUnits::Range if self.free_range() => {
257
0
                // If an ORDER BY clause is absent, it is equivalent to an
258
0
                // ORDER BY clause with constant value as sort key. If an
259
0
                // ORDER BY clause is present but has more than one column,
260
0
                // it is unchanged. Note that this follows PostgreSQL behavior.
261
0
                if order_by.is_empty() {
262
0
                    order_by.push(lit(1u64).sort(true, false));
263
0
                }
264
            }
265
0
            WindowFrameUnits::Range if order_by.len() != 1 => {
266
0
                return plan_err!("RANGE requires exactly one ORDER BY column");
267
            }
268
0
            WindowFrameUnits::Groups if order_by.is_empty() => {
269
0
                return plan_err!("GROUPS requires an ORDER BY clause");
270
            }
271
0
            _ => {}
272
        }
273
0
        Ok(())
274
0
    }
275
276
    /// Returns whether the window frame can accept multiple ORDER BY expressons.
277
0
    pub fn can_accept_multi_orderby(&self) -> bool {
278
0
        match self.units {
279
0
            WindowFrameUnits::Rows => true,
280
0
            WindowFrameUnits::Range => self.free_range(),
281
0
            WindowFrameUnits::Groups => true,
282
        }
283
0
    }
284
285
    /// Returns whether the window frame is "free range"; i.e. its start/end
286
    /// bounds are UNBOUNDED or CURRENT ROW.
287
0
    fn free_range(&self) -> bool {
288
0
        (self.start_bound.is_unbounded()
289
0
            || self.start_bound == WindowFrameBound::CurrentRow)
290
0
            && (self.end_bound.is_unbounded()
291
0
                || self.end_bound == WindowFrameBound::CurrentRow)
292
0
    }
293
}
294
295
/// There are five ways to describe starting and ending frame boundaries:
296
///
297
/// 1. UNBOUNDED PRECEDING
298
/// 2. `<expr>` PRECEDING
299
/// 3. CURRENT ROW
300
/// 4. `<expr>` FOLLOWING
301
/// 5. UNBOUNDED FOLLOWING
302
///
303
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
304
pub enum WindowFrameBound {
305
    /// 1. UNBOUNDED PRECEDING
306
    ///    The frame boundary is the first row in the partition.
307
    ///
308
    /// 2. `<expr>` PRECEDING
309
    ///    `<expr>` must be a non-negative constant numeric expression. The boundary is a row that
310
    ///    is `<expr>` "units" prior to the current row.
311
    Preceding(ScalarValue),
312
    /// 3. The current row.
313
    ///
314
    /// For RANGE and GROUPS frame types, peers of the current row are also
315
    /// included in the frame, unless specifically excluded by the EXCLUDE clause.
316
    /// This is true regardless of whether CURRENT ROW is used as the starting or ending frame
317
    /// boundary.
318
    CurrentRow,
319
    /// 4. This is the same as "`<expr>` PRECEDING" except that the boundary is `<expr>` units after the
320
    ///    current rather than before the current row.
321
    ///
322
    /// 5. UNBOUNDED FOLLOWING
323
    ///    The frame boundary is the last row in the partition.
324
    Following(ScalarValue),
325
}
326
327
impl WindowFrameBound {
328
14
    pub fn is_unbounded(&self) -> bool {
329
14
        match self {
330
13
            WindowFrameBound::Preceding(elem) => elem.is_null(),
331
1
            WindowFrameBound::CurrentRow => false,
332
0
            WindowFrameBound::Following(elem) => elem.is_null(),
333
        }
334
14
    }
335
}
336
337
impl TryFrom<ast::WindowFrameBound> for WindowFrameBound {
338
    type Error = DataFusionError;
339
340
0
    fn try_from(value: ast::WindowFrameBound) -> Result<Self> {
341
0
        Ok(match value {
342
0
            ast::WindowFrameBound::Preceding(Some(v)) => {
343
0
                Self::Preceding(convert_frame_bound_to_scalar_value(*v)?)
344
            }
345
0
            ast::WindowFrameBound::Preceding(None) => Self::Preceding(ScalarValue::Null),
346
0
            ast::WindowFrameBound::Following(Some(v)) => {
347
0
                Self::Following(convert_frame_bound_to_scalar_value(*v)?)
348
            }
349
0
            ast::WindowFrameBound::Following(None) => Self::Following(ScalarValue::Null),
350
0
            ast::WindowFrameBound::CurrentRow => Self::CurrentRow,
351
        })
352
0
    }
353
}
354
355
0
pub fn convert_frame_bound_to_scalar_value(v: ast::Expr) -> Result<ScalarValue> {
356
0
    Ok(ScalarValue::Utf8(Some(match v {
357
0
        ast::Expr::Value(ast::Value::Number(value, false))
358
0
        | ast::Expr::Value(ast::Value::SingleQuotedString(value)) => value,
359
        ast::Expr::Interval(ast::Interval {
360
0
            value,
361
0
            leading_field,
362
            ..
363
        }) => {
364
0
            let result = match *value {
365
0
                ast::Expr::Value(ast::Value::SingleQuotedString(item)) => item,
366
0
                e => {
367
0
                    return sql_err!(ParserError(format!(
368
0
                        "INTERVAL expression cannot be {e:?}"
369
0
                    )));
370
                }
371
            };
372
0
            if let Some(leading_field) = leading_field {
373
0
                format!("{result} {leading_field}")
374
            } else {
375
0
                result
376
            }
377
        }
378
0
        _ => plan_err!(
379
0
            "Invalid window frame: frame offsets must be non negative integers"
380
0
        )?,
381
    })))
382
0
}
383
384
impl fmt::Display for WindowFrameBound {
385
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
386
0
        match self {
387
0
            WindowFrameBound::Preceding(n) => {
388
0
                if n.is_null() {
389
0
                    f.write_str("UNBOUNDED PRECEDING")
390
                } else {
391
0
                    write!(f, "{n} PRECEDING")
392
                }
393
            }
394
0
            WindowFrameBound::CurrentRow => f.write_str("CURRENT ROW"),
395
0
            WindowFrameBound::Following(n) => {
396
0
                if n.is_null() {
397
0
                    f.write_str("UNBOUNDED FOLLOWING")
398
                } else {
399
0
                    write!(f, "{n} FOLLOWING")
400
                }
401
            }
402
        }
403
0
    }
404
}
405
406
/// There are three frame types: ROWS, GROUPS, and RANGE. The frame type determines how the
407
/// starting and ending boundaries of the frame are measured.
408
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)]
409
pub enum WindowFrameUnits {
410
    /// The ROWS frame type means that the starting and ending boundaries for the frame are
411
    /// determined by counting individual rows relative to the current row.
412
    Rows,
413
    /// The RANGE frame type requires that the ORDER BY clause of the window have exactly one
414
    /// term. Call that term "X". With the RANGE frame type, the elements of the frame are
415
    /// determined by computing the value of expression X for all rows in the partition and framing
416
    /// those rows for which the value of X is within a certain range of the value of X for the
417
    /// current row.
418
    Range,
419
    /// The GROUPS frame type means that the starting and ending boundaries are determine
420
    /// by counting "groups" relative to the current group. A "group" is a set of rows that all have
421
    /// equivalent values for all all terms of the window ORDER BY clause.
422
    Groups,
423
}
424
425
impl fmt::Display for WindowFrameUnits {
426
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
427
0
        f.write_str(match self {
428
0
            WindowFrameUnits::Rows => "ROWS",
429
0
            WindowFrameUnits::Range => "RANGE",
430
0
            WindowFrameUnits::Groups => "GROUPS",
431
        })
432
0
    }
433
}
434
435
impl From<ast::WindowFrameUnits> for WindowFrameUnits {
436
0
    fn from(value: ast::WindowFrameUnits) -> Self {
437
0
        match value {
438
0
            ast::WindowFrameUnits::Range => Self::Range,
439
0
            ast::WindowFrameUnits::Groups => Self::Groups,
440
0
            ast::WindowFrameUnits::Rows => Self::Rows,
441
        }
442
0
    }
443
}
444
445
#[cfg(test)]
446
mod tests {
447
    use super::*;
448
449
    #[test]
450
    fn test_window_frame_creation() -> Result<()> {
451
        let window_frame = ast::WindowFrame {
452
            units: ast::WindowFrameUnits::Range,
453
            start_bound: ast::WindowFrameBound::Following(None),
454
            end_bound: None,
455
        };
456
        let err = WindowFrame::try_from(window_frame).unwrap_err();
457
        assert_eq!(
458
            err.strip_backtrace(),
459
            "Error during planning: Invalid window frame: start bound cannot be UNBOUNDED FOLLOWING".to_owned()
460
        );
461
462
        let window_frame = ast::WindowFrame {
463
            units: ast::WindowFrameUnits::Range,
464
            start_bound: ast::WindowFrameBound::Preceding(None),
465
            end_bound: Some(ast::WindowFrameBound::Preceding(None)),
466
        };
467
        let err = WindowFrame::try_from(window_frame).unwrap_err();
468
        assert_eq!(
469
            err.strip_backtrace(),
470
            "Error during planning: Invalid window frame: end bound cannot be UNBOUNDED PRECEDING".to_owned()
471
        );
472
473
        let window_frame = ast::WindowFrame {
474
            units: ast::WindowFrameUnits::Rows,
475
            start_bound: ast::WindowFrameBound::Preceding(Some(Box::new(
476
                ast::Expr::Value(ast::Value::Number("2".to_string(), false)),
477
            ))),
478
            end_bound: Some(ast::WindowFrameBound::Preceding(Some(Box::new(
479
                ast::Expr::Value(ast::Value::Number("1".to_string(), false)),
480
            )))),
481
        };
482
        let result = WindowFrame::try_from(window_frame);
483
        assert!(result.is_ok());
484
        Ok(())
485
    }
486
}