Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/expr/src/built_in_window_function.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Built-in functions module contains all the built-in functions definitions.
19
20
use std::fmt;
21
use std::str::FromStr;
22
23
use crate::type_coercion::functions::data_types;
24
use crate::utils;
25
use crate::{Signature, TypeSignature, Volatility};
26
use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result};
27
28
use arrow::datatypes::DataType;
29
30
use strum_macros::EnumIter;
31
32
impl fmt::Display for BuiltInWindowFunction {
33
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
34
0
        write!(f, "{}", self.name())
35
0
    }
36
}
37
38
/// A [window function] built in to DataFusion
39
///
40
/// [window function]: https://en.wikipedia.org/wiki/Window_function_(SQL)
41
0
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash, EnumIter)]
42
pub enum BuiltInWindowFunction {
43
    /// rank of the current row with gaps; same as row_number of its first peer
44
    Rank,
45
    /// rank of the current row without gaps; this function counts peer groups
46
    DenseRank,
47
    /// relative rank of the current row: (rank - 1) / (total rows - 1)
48
    PercentRank,
49
    /// relative rank of the current row: (number of rows preceding or peer with current row) / (total rows)
50
    CumeDist,
51
    /// integer ranging from 1 to the argument value, dividing the partition as equally as possible
52
    Ntile,
53
    /// returns value evaluated at the row that is offset rows before the current row within the partition;
54
    /// if there is no such row, instead return default (which must be of the same type as value).
55
    /// Both offset and default are evaluated with respect to the current row.
56
    /// If omitted, offset defaults to 1 and default to null
57
    Lag,
58
    /// returns value evaluated at the row that is offset rows after the current row within the partition;
59
    /// if there is no such row, instead return default (which must be of the same type as value).
60
    /// Both offset and default are evaluated with respect to the current row.
61
    /// If omitted, offset defaults to 1 and default to null
62
    Lead,
63
    /// returns value evaluated at the row that is the first row of the window frame
64
    FirstValue,
65
    /// returns value evaluated at the row that is the last row of the window frame
66
    LastValue,
67
    /// returns value evaluated at the row that is the nth row of the window frame (counting from 1); null if no such row
68
    NthValue,
69
}
70
71
impl BuiltInWindowFunction {
72
0
    pub fn name(&self) -> &str {
73
        use BuiltInWindowFunction::*;
74
0
        match self {
75
0
            Rank => "RANK",
76
0
            DenseRank => "DENSE_RANK",
77
0
            PercentRank => "PERCENT_RANK",
78
0
            CumeDist => "CUME_DIST",
79
0
            Ntile => "NTILE",
80
0
            Lag => "LAG",
81
0
            Lead => "LEAD",
82
0
            FirstValue => "first_value",
83
0
            LastValue => "last_value",
84
0
            NthValue => "NTH_VALUE",
85
        }
86
0
    }
87
}
88
89
impl FromStr for BuiltInWindowFunction {
90
    type Err = DataFusionError;
91
0
    fn from_str(name: &str) -> Result<BuiltInWindowFunction> {
92
0
        Ok(match name.to_uppercase().as_str() {
93
0
            "RANK" => BuiltInWindowFunction::Rank,
94
0
            "DENSE_RANK" => BuiltInWindowFunction::DenseRank,
95
0
            "PERCENT_RANK" => BuiltInWindowFunction::PercentRank,
96
0
            "CUME_DIST" => BuiltInWindowFunction::CumeDist,
97
0
            "NTILE" => BuiltInWindowFunction::Ntile,
98
0
            "LAG" => BuiltInWindowFunction::Lag,
99
0
            "LEAD" => BuiltInWindowFunction::Lead,
100
0
            "FIRST_VALUE" => BuiltInWindowFunction::FirstValue,
101
0
            "LAST_VALUE" => BuiltInWindowFunction::LastValue,
102
0
            "NTH_VALUE" => BuiltInWindowFunction::NthValue,
103
0
            _ => return plan_err!("There is no built-in window function named {name}"),
104
        })
105
0
    }
106
}
107
108
/// Returns the datatype of the built-in window function
109
impl BuiltInWindowFunction {
110
0
    pub fn return_type(&self, input_expr_types: &[DataType]) -> Result<DataType> {
111
0
        // Note that this function *must* return the same type that the respective physical expression returns
112
0
        // or the execution panics.
113
0
114
0
        // verify that this is a valid set of data types for this function
115
0
        data_types(input_expr_types, &self.signature())
116
0
            // original errors are all related to wrong function signature
117
0
            // aggregate them for better error message
118
0
            .map_err(|_| {
119
0
                plan_datafusion_err!(
120
0
                    "{}",
121
0
                    utils::generate_signature_error_msg(
122
0
                        &format!("{self}"),
123
0
                        self.signature(),
124
0
                        input_expr_types,
125
0
                    )
126
0
                )
127
0
            })?;
128
129
0
        match self {
130
            BuiltInWindowFunction::Rank
131
            | BuiltInWindowFunction::DenseRank
132
0
            | BuiltInWindowFunction::Ntile => Ok(DataType::UInt64),
133
            BuiltInWindowFunction::PercentRank | BuiltInWindowFunction::CumeDist => {
134
0
                Ok(DataType::Float64)
135
            }
136
            BuiltInWindowFunction::Lag
137
            | BuiltInWindowFunction::Lead
138
            | BuiltInWindowFunction::FirstValue
139
            | BuiltInWindowFunction::LastValue
140
0
            | BuiltInWindowFunction::NthValue => Ok(input_expr_types[0].clone()),
141
        }
142
0
    }
143
144
    /// the signatures supported by the built-in window function `fun`.
145
0
    pub fn signature(&self) -> Signature {
146
0
        // note: the physical expression must accept the type returned by this function or the execution panics.
147
0
        match self {
148
            BuiltInWindowFunction::Rank
149
            | BuiltInWindowFunction::DenseRank
150
            | BuiltInWindowFunction::PercentRank
151
0
            | BuiltInWindowFunction::CumeDist => Signature::any(0, Volatility::Immutable),
152
            BuiltInWindowFunction::Lag | BuiltInWindowFunction::Lead => {
153
0
                Signature::one_of(
154
0
                    vec![
155
0
                        TypeSignature::Any(1),
156
0
                        TypeSignature::Any(2),
157
0
                        TypeSignature::Any(3),
158
0
                    ],
159
0
                    Volatility::Immutable,
160
0
                )
161
            }
162
            BuiltInWindowFunction::FirstValue | BuiltInWindowFunction::LastValue => {
163
0
                Signature::any(1, Volatility::Immutable)
164
            }
165
0
            BuiltInWindowFunction::Ntile => Signature::uniform(
166
0
                1,
167
0
                vec![
168
0
                    DataType::UInt64,
169
0
                    DataType::UInt32,
170
0
                    DataType::UInt16,
171
0
                    DataType::UInt8,
172
0
                    DataType::Int64,
173
0
                    DataType::Int32,
174
0
                    DataType::Int16,
175
0
                    DataType::Int8,
176
0
                ],
177
0
                Volatility::Immutable,
178
0
            ),
179
0
            BuiltInWindowFunction::NthValue => Signature::any(2, Volatility::Immutable),
180
        }
181
0
    }
182
}
183
184
#[cfg(test)]
185
mod tests {
186
    use super::*;
187
    use strum::IntoEnumIterator;
188
    #[test]
189
    // Test for BuiltInWindowFunction's Display and from_str() implementations.
190
    // For each variant in BuiltInWindowFunction, it converts the variant to a string
191
    // and then back to a variant. The test asserts that the original variant and
192
    // the reconstructed variant are the same. This assertion is also necessary for
193
    // function suggestion. See https://github.com/apache/datafusion/issues/8082
194
    fn test_display_and_from_str() {
195
        for func_original in BuiltInWindowFunction::iter() {
196
            let func_name = func_original.to_string();
197
            let func_from_str = BuiltInWindowFunction::from_str(&func_name).unwrap();
198
            assert_eq!(func_from_str, func_original);
199
        }
200
    }
201
}