/Users/andrewlamb/Software/datafusion/datafusion/expr/src/window_frame.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Window frame module |
19 | | //! |
20 | | //! The frame-spec determines which output rows are read by an aggregate window function. The frame-spec consists of four parts: |
21 | | //! - A frame type - either ROWS, RANGE or GROUPS, |
22 | | //! - A starting frame boundary, |
23 | | //! - An ending frame boundary, |
24 | | //! - An EXCLUDE clause. |
25 | | |
26 | | use std::fmt::{self, Formatter}; |
27 | | use std::hash::Hash; |
28 | | |
29 | | use crate::{expr::Sort, lit}; |
30 | | |
31 | | use datafusion_common::{plan_err, sql_err, DataFusionError, Result, ScalarValue}; |
32 | | use sqlparser::ast; |
33 | | use sqlparser::parser::ParserError::ParserError; |
34 | | |
35 | | /// The frame specification determines which output rows are read by an aggregate |
36 | | /// window function. The ending frame boundary can be omitted if the `BETWEEN` |
37 | | /// and `AND` keywords that surround the starting frame boundary are also omitted, |
38 | | /// in which case the ending frame boundary defaults to `CURRENT ROW`. |
39 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash)] |
40 | | pub struct WindowFrame { |
41 | | /// Frame type - either `ROWS`, `RANGE` or `GROUPS` |
42 | | pub units: WindowFrameUnits, |
43 | | /// Starting frame boundary |
44 | | pub start_bound: WindowFrameBound, |
45 | | /// Ending frame boundary |
46 | | pub end_bound: WindowFrameBound, |
47 | | /// Flag indicating whether the frame is causal (i.e. computing the result |
48 | | /// for the current row doesn't depend on any subsequent rows). |
49 | | /// |
50 | | /// Example causal window frames: |
51 | | /// ```text |
52 | | /// +--------------+ |
53 | | /// Future | | |
54 | | /// | | | |
55 | | /// | | | |
56 | | /// Current Row |+------------+| --- |
57 | | /// | | | | |
58 | | /// | | | | |
59 | | /// | | | | Window Frame 1 |
60 | | /// Past | | | |
61 | | /// | | | |
62 | | /// | | --- |
63 | | /// +--------------+ |
64 | | /// |
65 | | /// +--------------+ |
66 | | /// Future | | |
67 | | /// | | | |
68 | | /// | | | |
69 | | /// Current Row |+------------+| |
70 | | /// | | | |
71 | | /// | | | --- |
72 | | /// | | | | |
73 | | /// Past | | | Window Frame 2 |
74 | | /// | | | |
75 | | /// | | --- |
76 | | /// +--------------+ |
77 | | /// ``` |
78 | | /// Example non-causal window frame: |
79 | | /// ```text |
80 | | /// +--------------+ |
81 | | /// Future | | |
82 | | /// | | | |
83 | | /// | | | --- |
84 | | /// Current Row |+------------+| | |
85 | | /// | | | | Window Frame 3 |
86 | | /// | | | | |
87 | | /// | | | --- |
88 | | /// Past | | |
89 | | /// | | |
90 | | /// | | |
91 | | /// +--------------+ |
92 | | /// ``` |
93 | | causal: bool, |
94 | | } |
95 | | |
96 | | impl fmt::Display for WindowFrame { |
97 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
98 | 0 | write!( |
99 | 0 | f, |
100 | 0 | "{} BETWEEN {} AND {}", |
101 | 0 | self.units, self.start_bound, self.end_bound |
102 | 0 | )?; |
103 | 0 | Ok(()) |
104 | 0 | } |
105 | | } |
106 | | |
107 | | impl fmt::Debug for WindowFrame { |
108 | 4 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
109 | 4 | write!( |
110 | 4 | f, |
111 | 4 | "WindowFrame {{ units: {:?}, start_bound: {:?}, end_bound: {:?}, is_causal: {:?} }}", |
112 | 4 | self.units, self.start_bound, self.end_bound, self.causal |
113 | 4 | )?0 ; |
114 | 4 | Ok(()) |
115 | 4 | } |
116 | | } |
117 | | |
118 | | impl TryFrom<ast::WindowFrame> for WindowFrame { |
119 | | type Error = DataFusionError; |
120 | | |
121 | 0 | fn try_from(value: ast::WindowFrame) -> Result<Self> { |
122 | 0 | let start_bound = value.start_bound.try_into()?; |
123 | 0 | let end_bound = match value.end_bound { |
124 | 0 | Some(value) => value.try_into()?, |
125 | 0 | None => WindowFrameBound::CurrentRow, |
126 | | }; |
127 | | |
128 | 0 | if let WindowFrameBound::Following(val) = &start_bound { |
129 | 0 | if val.is_null() { |
130 | 0 | plan_err!( |
131 | 0 | "Invalid window frame: start bound cannot be UNBOUNDED FOLLOWING" |
132 | 0 | )? |
133 | 0 | } |
134 | 0 | } else if let WindowFrameBound::Preceding(val) = &end_bound { |
135 | 0 | if val.is_null() { |
136 | 0 | plan_err!( |
137 | 0 | "Invalid window frame: end bound cannot be UNBOUNDED PRECEDING" |
138 | 0 | )? |
139 | 0 | } |
140 | 0 | }; |
141 | 0 | let units = value.units.into(); |
142 | 0 | Ok(Self::new_bounds(units, start_bound, end_bound)) |
143 | 0 | } |
144 | | } |
145 | | |
146 | | impl WindowFrame { |
147 | | /// Creates a new, default window frame (with the meaning of default |
148 | | /// depending on whether the frame contains an `ORDER BY` clause and this |
149 | | /// ordering is strict (i.e. no ties). |
150 | 1 | pub fn new(order_by: Option<bool>) -> Self { |
151 | 1 | if let Some(strict0 ) = order_by { |
152 | | // This window frame covers the table (or partition if `PARTITION BY` |
153 | | // is used) from beginning to the `CURRENT ROW` (with same rank). It |
154 | | // is used when the `OVER` clause contains an `ORDER BY` clause but |
155 | | // no frame. |
156 | | Self { |
157 | 0 | units: if strict { |
158 | 0 | WindowFrameUnits::Rows |
159 | | } else { |
160 | 0 | WindowFrameUnits::Range |
161 | | }, |
162 | 0 | start_bound: WindowFrameBound::Preceding(ScalarValue::Null), |
163 | 0 | end_bound: WindowFrameBound::CurrentRow, |
164 | 0 | causal: strict, |
165 | | } |
166 | | } else { |
167 | | // This window frame covers the whole table (or partition if `PARTITION BY` |
168 | | // is used). It is used when the `OVER` clause does not contain an |
169 | | // `ORDER BY` clause and there is no frame. |
170 | 1 | Self { |
171 | 1 | units: WindowFrameUnits::Rows, |
172 | 1 | start_bound: WindowFrameBound::Preceding(ScalarValue::UInt64(None)), |
173 | 1 | end_bound: WindowFrameBound::Following(ScalarValue::UInt64(None)), |
174 | 1 | causal: false, |
175 | 1 | } |
176 | | } |
177 | 1 | } |
178 | | |
179 | | /// Get reversed window frame. For example |
180 | | /// `3 ROWS PRECEDING AND 2 ROWS FOLLOWING` --> |
181 | | /// `2 ROWS PRECEDING AND 3 ROWS FOLLOWING` |
182 | 0 | pub fn reverse(&self) -> Self { |
183 | 0 | let start_bound = match &self.end_bound { |
184 | 0 | WindowFrameBound::Preceding(value) => { |
185 | 0 | WindowFrameBound::Following(value.clone()) |
186 | | } |
187 | 0 | WindowFrameBound::Following(value) => { |
188 | 0 | WindowFrameBound::Preceding(value.clone()) |
189 | | } |
190 | 0 | WindowFrameBound::CurrentRow => WindowFrameBound::CurrentRow, |
191 | | }; |
192 | 0 | let end_bound = match &self.start_bound { |
193 | 0 | WindowFrameBound::Preceding(value) => { |
194 | 0 | WindowFrameBound::Following(value.clone()) |
195 | | } |
196 | 0 | WindowFrameBound::Following(value) => { |
197 | 0 | WindowFrameBound::Preceding(value.clone()) |
198 | | } |
199 | 0 | WindowFrameBound::CurrentRow => WindowFrameBound::CurrentRow, |
200 | | }; |
201 | 0 | Self::new_bounds(self.units, start_bound, end_bound) |
202 | 0 | } |
203 | | |
204 | | /// Get whether window frame is causal |
205 | 21 | pub fn is_causal(&self) -> bool { |
206 | 21 | self.causal |
207 | 21 | } |
208 | | |
209 | | /// Initializes window frame from units (type), start bound and end bound. |
210 | 4 | pub fn new_bounds( |
211 | 4 | units: WindowFrameUnits, |
212 | 4 | start_bound: WindowFrameBound, |
213 | 4 | end_bound: WindowFrameBound, |
214 | 4 | ) -> Self { |
215 | 4 | let causal = match units { |
216 | 3 | WindowFrameUnits::Rows => match &end_bound { |
217 | 0 | WindowFrameBound::Following(value) => { |
218 | 0 | if value.is_null() { |
219 | | // Unbounded following |
220 | 0 | false |
221 | | } else { |
222 | 0 | let zero = ScalarValue::new_zero(&value.data_type()); |
223 | 0 | zero.map(|zero| value.eq(&zero)).unwrap_or(false) |
224 | | } |
225 | | } |
226 | 3 | _ => true, |
227 | | }, |
228 | 1 | WindowFrameUnits::Range | WindowFrameUnits::Groups => match &end_bound { |
229 | 0 | WindowFrameBound::Preceding(value) => { |
230 | 0 | if value.is_null() { |
231 | | // Unbounded preceding |
232 | 0 | true |
233 | | } else { |
234 | 0 | let zero = ScalarValue::new_zero(&value.data_type()); |
235 | 0 | zero.map(|zero| value.gt(&zero)).unwrap_or(false) |
236 | | } |
237 | | } |
238 | 1 | _ => false, |
239 | | }, |
240 | | }; |
241 | 4 | Self { |
242 | 4 | units, |
243 | 4 | start_bound, |
244 | 4 | end_bound, |
245 | 4 | causal, |
246 | 4 | } |
247 | 4 | } |
248 | | |
249 | | /// Regularizes the ORDER BY clause of the window frame. |
250 | 0 | pub fn regularize_order_bys(&self, order_by: &mut Vec<Sort>) -> Result<()> { |
251 | 0 | match self.units { |
252 | | // Normally, RANGE frames require an ORDER BY clause with exactly |
253 | | // one column. However, an ORDER BY clause may be absent or have |
254 | | // more than one column when the start/end bounds are UNBOUNDED or |
255 | | // CURRENT ROW. |
256 | 0 | WindowFrameUnits::Range if self.free_range() => { |
257 | 0 | // If an ORDER BY clause is absent, it is equivalent to an |
258 | 0 | // ORDER BY clause with constant value as sort key. If an |
259 | 0 | // ORDER BY clause is present but has more than one column, |
260 | 0 | // it is unchanged. Note that this follows PostgreSQL behavior. |
261 | 0 | if order_by.is_empty() { |
262 | 0 | order_by.push(lit(1u64).sort(true, false)); |
263 | 0 | } |
264 | | } |
265 | 0 | WindowFrameUnits::Range if order_by.len() != 1 => { |
266 | 0 | return plan_err!("RANGE requires exactly one ORDER BY column"); |
267 | | } |
268 | 0 | WindowFrameUnits::Groups if order_by.is_empty() => { |
269 | 0 | return plan_err!("GROUPS requires an ORDER BY clause"); |
270 | | } |
271 | 0 | _ => {} |
272 | | } |
273 | 0 | Ok(()) |
274 | 0 | } |
275 | | |
276 | | /// Returns whether the window frame can accept multiple ORDER BY expressons. |
277 | 0 | pub fn can_accept_multi_orderby(&self) -> bool { |
278 | 0 | match self.units { |
279 | 0 | WindowFrameUnits::Rows => true, |
280 | 0 | WindowFrameUnits::Range => self.free_range(), |
281 | 0 | WindowFrameUnits::Groups => true, |
282 | | } |
283 | 0 | } |
284 | | |
285 | | /// Returns whether the window frame is "free range"; i.e. its start/end |
286 | | /// bounds are UNBOUNDED or CURRENT ROW. |
287 | 0 | fn free_range(&self) -> bool { |
288 | 0 | (self.start_bound.is_unbounded() |
289 | 0 | || self.start_bound == WindowFrameBound::CurrentRow) |
290 | 0 | && (self.end_bound.is_unbounded() |
291 | 0 | || self.end_bound == WindowFrameBound::CurrentRow) |
292 | 0 | } |
293 | | } |
294 | | |
295 | | /// There are five ways to describe starting and ending frame boundaries: |
296 | | /// |
297 | | /// 1. UNBOUNDED PRECEDING |
298 | | /// 2. `<expr>` PRECEDING |
299 | | /// 3. CURRENT ROW |
300 | | /// 4. `<expr>` FOLLOWING |
301 | | /// 5. UNBOUNDED FOLLOWING |
302 | | /// |
303 | | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] |
304 | | pub enum WindowFrameBound { |
305 | | /// 1. UNBOUNDED PRECEDING |
306 | | /// The frame boundary is the first row in the partition. |
307 | | /// |
308 | | /// 2. `<expr>` PRECEDING |
309 | | /// `<expr>` must be a non-negative constant numeric expression. The boundary is a row that |
310 | | /// is `<expr>` "units" prior to the current row. |
311 | | Preceding(ScalarValue), |
312 | | /// 3. The current row. |
313 | | /// |
314 | | /// For RANGE and GROUPS frame types, peers of the current row are also |
315 | | /// included in the frame, unless specifically excluded by the EXCLUDE clause. |
316 | | /// This is true regardless of whether CURRENT ROW is used as the starting or ending frame |
317 | | /// boundary. |
318 | | CurrentRow, |
319 | | /// 4. This is the same as "`<expr>` PRECEDING" except that the boundary is `<expr>` units after the |
320 | | /// current rather than before the current row. |
321 | | /// |
322 | | /// 5. UNBOUNDED FOLLOWING |
323 | | /// The frame boundary is the last row in the partition. |
324 | | Following(ScalarValue), |
325 | | } |
326 | | |
327 | | impl WindowFrameBound { |
328 | 14 | pub fn is_unbounded(&self) -> bool { |
329 | 14 | match self { |
330 | 13 | WindowFrameBound::Preceding(elem) => elem.is_null(), |
331 | 1 | WindowFrameBound::CurrentRow => false, |
332 | 0 | WindowFrameBound::Following(elem) => elem.is_null(), |
333 | | } |
334 | 14 | } |
335 | | } |
336 | | |
337 | | impl TryFrom<ast::WindowFrameBound> for WindowFrameBound { |
338 | | type Error = DataFusionError; |
339 | | |
340 | 0 | fn try_from(value: ast::WindowFrameBound) -> Result<Self> { |
341 | 0 | Ok(match value { |
342 | 0 | ast::WindowFrameBound::Preceding(Some(v)) => { |
343 | 0 | Self::Preceding(convert_frame_bound_to_scalar_value(*v)?) |
344 | | } |
345 | 0 | ast::WindowFrameBound::Preceding(None) => Self::Preceding(ScalarValue::Null), |
346 | 0 | ast::WindowFrameBound::Following(Some(v)) => { |
347 | 0 | Self::Following(convert_frame_bound_to_scalar_value(*v)?) |
348 | | } |
349 | 0 | ast::WindowFrameBound::Following(None) => Self::Following(ScalarValue::Null), |
350 | 0 | ast::WindowFrameBound::CurrentRow => Self::CurrentRow, |
351 | | }) |
352 | 0 | } |
353 | | } |
354 | | |
355 | 0 | pub fn convert_frame_bound_to_scalar_value(v: ast::Expr) -> Result<ScalarValue> { |
356 | 0 | Ok(ScalarValue::Utf8(Some(match v { |
357 | 0 | ast::Expr::Value(ast::Value::Number(value, false)) |
358 | 0 | | ast::Expr::Value(ast::Value::SingleQuotedString(value)) => value, |
359 | | ast::Expr::Interval(ast::Interval { |
360 | 0 | value, |
361 | 0 | leading_field, |
362 | | .. |
363 | | }) => { |
364 | 0 | let result = match *value { |
365 | 0 | ast::Expr::Value(ast::Value::SingleQuotedString(item)) => item, |
366 | 0 | e => { |
367 | 0 | return sql_err!(ParserError(format!( |
368 | 0 | "INTERVAL expression cannot be {e:?}" |
369 | 0 | ))); |
370 | | } |
371 | | }; |
372 | 0 | if let Some(leading_field) = leading_field { |
373 | 0 | format!("{result} {leading_field}") |
374 | | } else { |
375 | 0 | result |
376 | | } |
377 | | } |
378 | 0 | _ => plan_err!( |
379 | 0 | "Invalid window frame: frame offsets must be non negative integers" |
380 | 0 | )?, |
381 | | }))) |
382 | 0 | } |
383 | | |
384 | | impl fmt::Display for WindowFrameBound { |
385 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
386 | 0 | match self { |
387 | 0 | WindowFrameBound::Preceding(n) => { |
388 | 0 | if n.is_null() { |
389 | 0 | f.write_str("UNBOUNDED PRECEDING") |
390 | | } else { |
391 | 0 | write!(f, "{n} PRECEDING") |
392 | | } |
393 | | } |
394 | 0 | WindowFrameBound::CurrentRow => f.write_str("CURRENT ROW"), |
395 | 0 | WindowFrameBound::Following(n) => { |
396 | 0 | if n.is_null() { |
397 | 0 | f.write_str("UNBOUNDED FOLLOWING") |
398 | | } else { |
399 | 0 | write!(f, "{n} FOLLOWING") |
400 | | } |
401 | | } |
402 | | } |
403 | 0 | } |
404 | | } |
405 | | |
406 | | /// There are three frame types: ROWS, GROUPS, and RANGE. The frame type determines how the |
407 | | /// starting and ending boundaries of the frame are measured. |
408 | | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)] |
409 | | pub enum WindowFrameUnits { |
410 | | /// The ROWS frame type means that the starting and ending boundaries for the frame are |
411 | | /// determined by counting individual rows relative to the current row. |
412 | | Rows, |
413 | | /// The RANGE frame type requires that the ORDER BY clause of the window have exactly one |
414 | | /// term. Call that term "X". With the RANGE frame type, the elements of the frame are |
415 | | /// determined by computing the value of expression X for all rows in the partition and framing |
416 | | /// those rows for which the value of X is within a certain range of the value of X for the |
417 | | /// current row. |
418 | | Range, |
419 | | /// The GROUPS frame type means that the starting and ending boundaries are determine |
420 | | /// by counting "groups" relative to the current group. A "group" is a set of rows that all have |
421 | | /// equivalent values for all all terms of the window ORDER BY clause. |
422 | | Groups, |
423 | | } |
424 | | |
425 | | impl fmt::Display for WindowFrameUnits { |
426 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
427 | 0 | f.write_str(match self { |
428 | 0 | WindowFrameUnits::Rows => "ROWS", |
429 | 0 | WindowFrameUnits::Range => "RANGE", |
430 | 0 | WindowFrameUnits::Groups => "GROUPS", |
431 | | }) |
432 | 0 | } |
433 | | } |
434 | | |
435 | | impl From<ast::WindowFrameUnits> for WindowFrameUnits { |
436 | 0 | fn from(value: ast::WindowFrameUnits) -> Self { |
437 | 0 | match value { |
438 | 0 | ast::WindowFrameUnits::Range => Self::Range, |
439 | 0 | ast::WindowFrameUnits::Groups => Self::Groups, |
440 | 0 | ast::WindowFrameUnits::Rows => Self::Rows, |
441 | | } |
442 | 0 | } |
443 | | } |
444 | | |
445 | | #[cfg(test)] |
446 | | mod tests { |
447 | | use super::*; |
448 | | |
449 | | #[test] |
450 | | fn test_window_frame_creation() -> Result<()> { |
451 | | let window_frame = ast::WindowFrame { |
452 | | units: ast::WindowFrameUnits::Range, |
453 | | start_bound: ast::WindowFrameBound::Following(None), |
454 | | end_bound: None, |
455 | | }; |
456 | | let err = WindowFrame::try_from(window_frame).unwrap_err(); |
457 | | assert_eq!( |
458 | | err.strip_backtrace(), |
459 | | "Error during planning: Invalid window frame: start bound cannot be UNBOUNDED FOLLOWING".to_owned() |
460 | | ); |
461 | | |
462 | | let window_frame = ast::WindowFrame { |
463 | | units: ast::WindowFrameUnits::Range, |
464 | | start_bound: ast::WindowFrameBound::Preceding(None), |
465 | | end_bound: Some(ast::WindowFrameBound::Preceding(None)), |
466 | | }; |
467 | | let err = WindowFrame::try_from(window_frame).unwrap_err(); |
468 | | assert_eq!( |
469 | | err.strip_backtrace(), |
470 | | "Error during planning: Invalid window frame: end bound cannot be UNBOUNDED PRECEDING".to_owned() |
471 | | ); |
472 | | |
473 | | let window_frame = ast::WindowFrame { |
474 | | units: ast::WindowFrameUnits::Rows, |
475 | | start_bound: ast::WindowFrameBound::Preceding(Some(Box::new( |
476 | | ast::Expr::Value(ast::Value::Number("2".to_string(), false)), |
477 | | ))), |
478 | | end_bound: Some(ast::WindowFrameBound::Preceding(Some(Box::new( |
479 | | ast::Expr::Value(ast::Value::Number("1".to_string(), false)), |
480 | | )))), |
481 | | }; |
482 | | let result = WindowFrame::try_from(window_frame); |
483 | | assert!(result.is_ok()); |
484 | | Ok(()) |
485 | | } |
486 | | } |