/Users/andrewlamb/Software/datafusion/datafusion/expr/src/built_in_window_function.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Built-in functions module contains all the built-in functions definitions. |
19 | | |
20 | | use std::fmt; |
21 | | use std::str::FromStr; |
22 | | |
23 | | use crate::type_coercion::functions::data_types; |
24 | | use crate::utils; |
25 | | use crate::{Signature, TypeSignature, Volatility}; |
26 | | use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result}; |
27 | | |
28 | | use arrow::datatypes::DataType; |
29 | | |
30 | | use strum_macros::EnumIter; |
31 | | |
32 | | impl fmt::Display for BuiltInWindowFunction { |
33 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
34 | 0 | write!(f, "{}", self.name()) |
35 | 0 | } |
36 | | } |
37 | | |
38 | | /// A [window function] built in to DataFusion |
39 | | /// |
40 | | /// [window function]: https://en.wikipedia.org/wiki/Window_function_(SQL) |
41 | 0 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash, EnumIter)] |
42 | | pub enum BuiltInWindowFunction { |
43 | | /// rank of the current row with gaps; same as row_number of its first peer |
44 | | Rank, |
45 | | /// rank of the current row without gaps; this function counts peer groups |
46 | | DenseRank, |
47 | | /// relative rank of the current row: (rank - 1) / (total rows - 1) |
48 | | PercentRank, |
49 | | /// relative rank of the current row: (number of rows preceding or peer with current row) / (total rows) |
50 | | CumeDist, |
51 | | /// integer ranging from 1 to the argument value, dividing the partition as equally as possible |
52 | | Ntile, |
53 | | /// returns value evaluated at the row that is offset rows before the current row within the partition; |
54 | | /// if there is no such row, instead return default (which must be of the same type as value). |
55 | | /// Both offset and default are evaluated with respect to the current row. |
56 | | /// If omitted, offset defaults to 1 and default to null |
57 | | Lag, |
58 | | /// returns value evaluated at the row that is offset rows after the current row within the partition; |
59 | | /// if there is no such row, instead return default (which must be of the same type as value). |
60 | | /// Both offset and default are evaluated with respect to the current row. |
61 | | /// If omitted, offset defaults to 1 and default to null |
62 | | Lead, |
63 | | /// returns value evaluated at the row that is the first row of the window frame |
64 | | FirstValue, |
65 | | /// returns value evaluated at the row that is the last row of the window frame |
66 | | LastValue, |
67 | | /// returns value evaluated at the row that is the nth row of the window frame (counting from 1); null if no such row |
68 | | NthValue, |
69 | | } |
70 | | |
71 | | impl BuiltInWindowFunction { |
72 | 0 | pub fn name(&self) -> &str { |
73 | | use BuiltInWindowFunction::*; |
74 | 0 | match self { |
75 | 0 | Rank => "RANK", |
76 | 0 | DenseRank => "DENSE_RANK", |
77 | 0 | PercentRank => "PERCENT_RANK", |
78 | 0 | CumeDist => "CUME_DIST", |
79 | 0 | Ntile => "NTILE", |
80 | 0 | Lag => "LAG", |
81 | 0 | Lead => "LEAD", |
82 | 0 | FirstValue => "first_value", |
83 | 0 | LastValue => "last_value", |
84 | 0 | NthValue => "NTH_VALUE", |
85 | | } |
86 | 0 | } |
87 | | } |
88 | | |
89 | | impl FromStr for BuiltInWindowFunction { |
90 | | type Err = DataFusionError; |
91 | 0 | fn from_str(name: &str) -> Result<BuiltInWindowFunction> { |
92 | 0 | Ok(match name.to_uppercase().as_str() { |
93 | 0 | "RANK" => BuiltInWindowFunction::Rank, |
94 | 0 | "DENSE_RANK" => BuiltInWindowFunction::DenseRank, |
95 | 0 | "PERCENT_RANK" => BuiltInWindowFunction::PercentRank, |
96 | 0 | "CUME_DIST" => BuiltInWindowFunction::CumeDist, |
97 | 0 | "NTILE" => BuiltInWindowFunction::Ntile, |
98 | 0 | "LAG" => BuiltInWindowFunction::Lag, |
99 | 0 | "LEAD" => BuiltInWindowFunction::Lead, |
100 | 0 | "FIRST_VALUE" => BuiltInWindowFunction::FirstValue, |
101 | 0 | "LAST_VALUE" => BuiltInWindowFunction::LastValue, |
102 | 0 | "NTH_VALUE" => BuiltInWindowFunction::NthValue, |
103 | 0 | _ => return plan_err!("There is no built-in window function named {name}"), |
104 | | }) |
105 | 0 | } |
106 | | } |
107 | | |
108 | | /// Returns the datatype of the built-in window function |
109 | | impl BuiltInWindowFunction { |
110 | 0 | pub fn return_type(&self, input_expr_types: &[DataType]) -> Result<DataType> { |
111 | 0 | // Note that this function *must* return the same type that the respective physical expression returns |
112 | 0 | // or the execution panics. |
113 | 0 |
|
114 | 0 | // verify that this is a valid set of data types for this function |
115 | 0 | data_types(input_expr_types, &self.signature()) |
116 | 0 | // original errors are all related to wrong function signature |
117 | 0 | // aggregate them for better error message |
118 | 0 | .map_err(|_| { |
119 | 0 | plan_datafusion_err!( |
120 | 0 | "{}", |
121 | 0 | utils::generate_signature_error_msg( |
122 | 0 | &format!("{self}"), |
123 | 0 | self.signature(), |
124 | 0 | input_expr_types, |
125 | 0 | ) |
126 | 0 | ) |
127 | 0 | })?; |
128 | | |
129 | 0 | match self { |
130 | | BuiltInWindowFunction::Rank |
131 | | | BuiltInWindowFunction::DenseRank |
132 | 0 | | BuiltInWindowFunction::Ntile => Ok(DataType::UInt64), |
133 | | BuiltInWindowFunction::PercentRank | BuiltInWindowFunction::CumeDist => { |
134 | 0 | Ok(DataType::Float64) |
135 | | } |
136 | | BuiltInWindowFunction::Lag |
137 | | | BuiltInWindowFunction::Lead |
138 | | | BuiltInWindowFunction::FirstValue |
139 | | | BuiltInWindowFunction::LastValue |
140 | 0 | | BuiltInWindowFunction::NthValue => Ok(input_expr_types[0].clone()), |
141 | | } |
142 | 0 | } |
143 | | |
144 | | /// the signatures supported by the built-in window function `fun`. |
145 | 0 | pub fn signature(&self) -> Signature { |
146 | 0 | // note: the physical expression must accept the type returned by this function or the execution panics. |
147 | 0 | match self { |
148 | | BuiltInWindowFunction::Rank |
149 | | | BuiltInWindowFunction::DenseRank |
150 | | | BuiltInWindowFunction::PercentRank |
151 | 0 | | BuiltInWindowFunction::CumeDist => Signature::any(0, Volatility::Immutable), |
152 | | BuiltInWindowFunction::Lag | BuiltInWindowFunction::Lead => { |
153 | 0 | Signature::one_of( |
154 | 0 | vec![ |
155 | 0 | TypeSignature::Any(1), |
156 | 0 | TypeSignature::Any(2), |
157 | 0 | TypeSignature::Any(3), |
158 | 0 | ], |
159 | 0 | Volatility::Immutable, |
160 | 0 | ) |
161 | | } |
162 | | BuiltInWindowFunction::FirstValue | BuiltInWindowFunction::LastValue => { |
163 | 0 | Signature::any(1, Volatility::Immutable) |
164 | | } |
165 | 0 | BuiltInWindowFunction::Ntile => Signature::uniform( |
166 | 0 | 1, |
167 | 0 | vec![ |
168 | 0 | DataType::UInt64, |
169 | 0 | DataType::UInt32, |
170 | 0 | DataType::UInt16, |
171 | 0 | DataType::UInt8, |
172 | 0 | DataType::Int64, |
173 | 0 | DataType::Int32, |
174 | 0 | DataType::Int16, |
175 | 0 | DataType::Int8, |
176 | 0 | ], |
177 | 0 | Volatility::Immutable, |
178 | 0 | ), |
179 | 0 | BuiltInWindowFunction::NthValue => Signature::any(2, Volatility::Immutable), |
180 | | } |
181 | 0 | } |
182 | | } |
183 | | |
184 | | #[cfg(test)] |
185 | | mod tests { |
186 | | use super::*; |
187 | | use strum::IntoEnumIterator; |
188 | | #[test] |
189 | | // Test for BuiltInWindowFunction's Display and from_str() implementations. |
190 | | // For each variant in BuiltInWindowFunction, it converts the variant to a string |
191 | | // and then back to a variant. The test asserts that the original variant and |
192 | | // the reconstructed variant are the same. This assertion is also necessary for |
193 | | // function suggestion. See https://github.com/apache/datafusion/issues/8082 |
194 | | fn test_display_and_from_str() { |
195 | | for func_original in BuiltInWindowFunction::iter() { |
196 | | let func_name = func_original.to_string(); |
197 | | let func_from_str = BuiltInWindowFunction::from_str(&func_name).unwrap(); |
198 | | assert_eq!(func_from_str, func_original); |
199 | | } |
200 | | } |
201 | | } |