/Users/andrewlamb/Software/datafusion/datafusion/expr-common/src/signature.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Signature module contains foundational types that are used to represent signatures, types, |
19 | | //! and return types of functions in DataFusion. |
20 | | |
21 | | use arrow::datatypes::DataType; |
22 | | |
23 | | /// Constant that is used as a placeholder for any valid timezone. |
24 | | /// This is used where a function can accept a timestamp type with any |
25 | | /// valid timezone, it exists to avoid the need to enumerate all possible |
26 | | /// timezones. See [`TypeSignature`] for more details. |
27 | | /// |
28 | | /// Type coercion always ensures that functions will be executed using |
29 | | /// timestamp arrays that have a valid time zone. Functions must never |
30 | | /// return results with this timezone. |
31 | | pub const TIMEZONE_WILDCARD: &str = "+TZ"; |
32 | | |
33 | | /// Constant that is used as a placeholder for any valid fixed size list. |
34 | | /// This is used where a function can accept a fixed size list type with any |
35 | | /// valid length. It exists to avoid the need to enumerate all possible fixed size list lengths. |
36 | | pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN; |
37 | | |
38 | | ///A function's volatility, which defines the functions eligibility for certain optimizations |
39 | | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] |
40 | | pub enum Volatility { |
41 | | /// An immutable function will always return the same output when given the same |
42 | | /// input. DataFusion will attempt to inline immutable functions during planning. |
43 | | Immutable, |
44 | | /// A stable function may return different values given the same input across different |
45 | | /// queries but must return the same value for a given input within a query. An example of |
46 | | /// this is the `Now` function. DataFusion will attempt to inline `Stable` functions |
47 | | /// during planning, when possible. |
48 | | /// For query `select col1, now() from t1`, it might take a while to execute but |
49 | | /// `now()` column will be the same for each output row, which is evaluated |
50 | | /// during planning. |
51 | | Stable, |
52 | | /// A volatile function may change the return value from evaluation to evaluation. |
53 | | /// Multiple invocations of a volatile function may return different results when used in the |
54 | | /// same query. An example of this is the random() function. DataFusion |
55 | | /// can not evaluate such functions during planning. |
56 | | /// In the query `select col1, random() from t1`, `random()` function will be evaluated |
57 | | /// for each output row, resulting in a unique random value for each row. |
58 | | Volatile, |
59 | | } |
60 | | |
61 | | /// A function's type signature defines the types of arguments the function supports. |
62 | | /// |
63 | | /// Functions typically support only a few different types of arguments compared to the |
64 | | /// different datatypes in Arrow. To make functions easy to use, when possible DataFusion |
65 | | /// automatically coerces (add casts to) function arguments so they match the type signature. |
66 | | /// |
67 | | /// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query |
68 | | /// that calls `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically |
69 | | /// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning. |
70 | | /// |
71 | | /// # Data Types |
72 | | /// Types to match are represented using Arrow's [`DataType`]. [`DataType::Timestamp`] has an optional variable |
73 | | /// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use |
74 | | /// the [`TIMEZONE_WILDCARD`]. For example: |
75 | | /// |
76 | | /// ``` |
77 | | /// # use arrow::datatypes::{DataType, TimeUnit}; |
78 | | /// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature}; |
79 | | /// let type_signature = TypeSignature::Exact(vec![ |
80 | | /// // A nanosecond precision timestamp with ANY timezone |
81 | | /// // matches Timestamp(Nanosecond, Some("+0:00")) |
82 | | /// // matches Timestamp(Nanosecond, Some("+5:00")) |
83 | | /// // does not match Timestamp(Nanosecond, None) |
84 | | /// DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())), |
85 | | /// ]); |
86 | | /// ``` |
87 | | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] |
88 | | pub enum TypeSignature { |
89 | | /// One or more arguments of an common type out of a list of valid types. |
90 | | /// |
91 | | /// # Examples |
92 | | /// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])` |
93 | | Variadic(Vec<DataType>), |
94 | | /// The acceptable signature and coercions rules to coerce arguments to this |
95 | | /// signature are special for this function. If this signature is specified, |
96 | | /// DataFusion will call `ScalarUDFImpl::coerce_types` to prepare argument types. |
97 | | UserDefined, |
98 | | /// One or more arguments with arbitrary types |
99 | | VariadicAny, |
100 | | /// Fixed number of arguments of an arbitrary but equal type out of a list of valid types. |
101 | | /// |
102 | | /// # Examples |
103 | | /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])` |
104 | | /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])` |
105 | | Uniform(usize, Vec<DataType>), |
106 | | /// Exact number of arguments of an exact type |
107 | | Exact(Vec<DataType>), |
108 | | /// The number of arguments that can be coerced to in order |
109 | | /// For example, `Coercible(vec![DataType::Float64])` accepts |
110 | | /// arguments like `vec![DataType::Int32]` or `vec![DataType::Float32]` |
111 | | /// since i32 and f32 can be casted to f64 |
112 | | Coercible(Vec<DataType>), |
113 | | /// Fixed number of arguments of arbitrary types |
114 | | /// If a function takes 0 argument, its `TypeSignature` should be `Any(0)` |
115 | | Any(usize), |
116 | | /// Matches exactly one of a list of [`TypeSignature`]s. Coercion is attempted to match |
117 | | /// the signatures in order, and stops after the first success, if any. |
118 | | /// |
119 | | /// # Examples |
120 | | /// Function `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature` |
121 | | /// is `OneOf(vec![Any(0), VariadicAny])`. |
122 | | OneOf(Vec<TypeSignature>), |
123 | | /// Specifies Signatures for array functions |
124 | | ArraySignature(ArrayFunctionSignature), |
125 | | /// Fixed number of arguments of numeric types. |
126 | | /// See <https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_numeric> to know which type is considered numeric |
127 | | Numeric(usize), |
128 | | } |
129 | | |
130 | | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] |
131 | | pub enum ArrayFunctionSignature { |
132 | | /// Specialized Signature for ArrayAppend and similar functions |
133 | | /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list. |
134 | | /// The second argument's list dimension should be one dimension less than the first argument's list dimension. |
135 | | /// List dimension of the List/LargeList is equivalent to the number of List. |
136 | | /// List dimension of the non-list is 0. |
137 | | ArrayAndElement, |
138 | | /// Specialized Signature for ArrayPrepend and similar functions |
139 | | /// The first argument should be non-list or list, and the second argument should be List/LargeList. |
140 | | /// The first argument's list dimension should be one dimension less than the second argument's list dimension. |
141 | | ElementAndArray, |
142 | | /// Specialized Signature for Array functions of the form (List/LargeList, Index) |
143 | | /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64. |
144 | | ArrayAndIndex, |
145 | | /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index) |
146 | | ArrayAndElementAndOptionalIndex, |
147 | | /// Specialized Signature for ArrayEmpty and similar functions |
148 | | /// The function takes a single argument that must be a List/LargeList/FixedSizeList |
149 | | /// or something that can be coerced to one of those types. |
150 | | Array, |
151 | | /// Specialized Signature for MapArray |
152 | | /// The function takes a single argument that must be a MapArray |
153 | | MapArray, |
154 | | } |
155 | | |
156 | | impl std::fmt::Display for ArrayFunctionSignature { |
157 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
158 | 0 | match self { |
159 | | ArrayFunctionSignature::ArrayAndElement => { |
160 | 0 | write!(f, "array, element") |
161 | | } |
162 | | ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { |
163 | 0 | write!(f, "array, element, [index]") |
164 | | } |
165 | | ArrayFunctionSignature::ElementAndArray => { |
166 | 0 | write!(f, "element, array") |
167 | | } |
168 | | ArrayFunctionSignature::ArrayAndIndex => { |
169 | 0 | write!(f, "array, index") |
170 | | } |
171 | | ArrayFunctionSignature::Array => { |
172 | 0 | write!(f, "array") |
173 | | } |
174 | | ArrayFunctionSignature::MapArray => { |
175 | 0 | write!(f, "map_array") |
176 | | } |
177 | | } |
178 | 0 | } |
179 | | } |
180 | | |
181 | | impl TypeSignature { |
182 | 0 | pub fn to_string_repr(&self) -> Vec<String> { |
183 | 0 | match self { |
184 | 0 | TypeSignature::Variadic(types) => { |
185 | 0 | vec![format!("{}, ..", Self::join_types(types, "/"))] |
186 | | } |
187 | 0 | TypeSignature::Uniform(arg_count, valid_types) => { |
188 | 0 | vec![std::iter::repeat(Self::join_types(valid_types, "/")) |
189 | 0 | .take(*arg_count) |
190 | 0 | .collect::<Vec<String>>() |
191 | 0 | .join(", ")] |
192 | | } |
193 | 0 | TypeSignature::Numeric(num) => { |
194 | 0 | vec![format!("Numeric({})", num)] |
195 | | } |
196 | 0 | TypeSignature::Exact(types) | TypeSignature::Coercible(types) => { |
197 | 0 | vec![Self::join_types(types, ", ")] |
198 | | } |
199 | 0 | TypeSignature::Any(arg_count) => { |
200 | 0 | vec![std::iter::repeat("Any") |
201 | 0 | .take(*arg_count) |
202 | 0 | .collect::<Vec<&str>>() |
203 | 0 | .join(", ")] |
204 | | } |
205 | | TypeSignature::UserDefined => { |
206 | 0 | vec!["UserDefined".to_string()] |
207 | | } |
208 | 0 | TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()], |
209 | 0 | TypeSignature::OneOf(sigs) => { |
210 | 0 | sigs.iter().flat_map(|s| s.to_string_repr()).collect() |
211 | | } |
212 | 0 | TypeSignature::ArraySignature(array_signature) => { |
213 | 0 | vec![array_signature.to_string()] |
214 | | } |
215 | | } |
216 | 0 | } |
217 | | |
218 | | /// Helper function to join types with specified delimiter. |
219 | 0 | pub fn join_types<T: std::fmt::Display>(types: &[T], delimiter: &str) -> String { |
220 | 0 | types |
221 | 0 | .iter() |
222 | 0 | .map(|t| t.to_string()) |
223 | 0 | .collect::<Vec<String>>() |
224 | 0 | .join(delimiter) |
225 | 0 | } |
226 | | |
227 | | /// Check whether 0 input argument is valid for given `TypeSignature` |
228 | 0 | pub fn supports_zero_argument(&self) -> bool { |
229 | 0 | match &self { |
230 | 0 | TypeSignature::Exact(vec) => vec.is_empty(), |
231 | 0 | TypeSignature::Uniform(0, _) | TypeSignature::Any(0) => true, |
232 | 0 | TypeSignature::OneOf(types) => types |
233 | 0 | .iter() |
234 | 0 | .any(|type_sig| type_sig.supports_zero_argument()), |
235 | 0 | _ => false, |
236 | | } |
237 | 0 | } |
238 | | } |
239 | | |
240 | | /// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function. |
241 | | /// |
242 | | /// DataFusion will automatically coerce (cast) argument types to one of the supported |
243 | | /// function signatures, if possible. |
244 | | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] |
245 | | pub struct Signature { |
246 | | /// The data types that the function accepts. See [TypeSignature] for more information. |
247 | | pub type_signature: TypeSignature, |
248 | | /// The volatility of the function. See [Volatility] for more information. |
249 | | pub volatility: Volatility, |
250 | | } |
251 | | |
252 | | impl Signature { |
253 | | /// Creates a new Signature from a given type signature and volatility. |
254 | 0 | pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self { |
255 | 0 | Signature { |
256 | 0 | type_signature, |
257 | 0 | volatility, |
258 | 0 | } |
259 | 0 | } |
260 | | /// An arbitrary number of arguments with the same type, from those listed in `common_types`. |
261 | 0 | pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self { |
262 | 0 | Self { |
263 | 0 | type_signature: TypeSignature::Variadic(common_types), |
264 | 0 | volatility, |
265 | 0 | } |
266 | 0 | } |
267 | | /// User-defined coercion rules for the function. |
268 | 2 | pub fn user_defined(volatility: Volatility) -> Self { |
269 | 2 | Self { |
270 | 2 | type_signature: TypeSignature::UserDefined, |
271 | 2 | volatility, |
272 | 2 | } |
273 | 2 | } |
274 | | |
275 | | /// A specified number of numeric arguments |
276 | 1 | pub fn numeric(arg_count: usize, volatility: Volatility) -> Self { |
277 | 1 | Self { |
278 | 1 | type_signature: TypeSignature::Numeric(arg_count), |
279 | 1 | volatility, |
280 | 1 | } |
281 | 1 | } |
282 | | |
283 | | /// An arbitrary number of arguments of any type. |
284 | 0 | pub fn variadic_any(volatility: Volatility) -> Self { |
285 | 0 | Self { |
286 | 0 | type_signature: TypeSignature::VariadicAny, |
287 | 0 | volatility, |
288 | 0 | } |
289 | 0 | } |
290 | | /// A fixed number of arguments of the same type, from those listed in `valid_types`. |
291 | 0 | pub fn uniform( |
292 | 0 | arg_count: usize, |
293 | 0 | valid_types: Vec<DataType>, |
294 | 0 | volatility: Volatility, |
295 | 0 | ) -> Self { |
296 | 0 | Self { |
297 | 0 | type_signature: TypeSignature::Uniform(arg_count, valid_types), |
298 | 0 | volatility, |
299 | 0 | } |
300 | 0 | } |
301 | | /// Exactly matches the types in `exact_types`, in order. |
302 | 0 | pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self { |
303 | 0 | Signature { |
304 | 0 | type_signature: TypeSignature::Exact(exact_types), |
305 | 0 | volatility, |
306 | 0 | } |
307 | 0 | } |
308 | | /// Target coerce types in order |
309 | 0 | pub fn coercible(target_types: Vec<DataType>, volatility: Volatility) -> Self { |
310 | 0 | Self { |
311 | 0 | type_signature: TypeSignature::Coercible(target_types), |
312 | 0 | volatility, |
313 | 0 | } |
314 | 0 | } |
315 | | |
316 | | /// A specified number of arguments of any type |
317 | 1 | pub fn any(arg_count: usize, volatility: Volatility) -> Self { |
318 | 1 | Signature { |
319 | 1 | type_signature: TypeSignature::Any(arg_count), |
320 | 1 | volatility, |
321 | 1 | } |
322 | 1 | } |
323 | | /// Any one of a list of [TypeSignature]s. |
324 | 3 | pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self { |
325 | 3 | Signature { |
326 | 3 | type_signature: TypeSignature::OneOf(type_signatures), |
327 | 3 | volatility, |
328 | 3 | } |
329 | 3 | } |
330 | | /// Specialized Signature for ArrayAppend and similar functions |
331 | 0 | pub fn array_and_element(volatility: Volatility) -> Self { |
332 | 0 | Signature { |
333 | 0 | type_signature: TypeSignature::ArraySignature( |
334 | 0 | ArrayFunctionSignature::ArrayAndElement, |
335 | 0 | ), |
336 | 0 | volatility, |
337 | 0 | } |
338 | 0 | } |
339 | | /// Specialized Signature for Array functions with an optional index |
340 | 0 | pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self { |
341 | 0 | Signature { |
342 | 0 | type_signature: TypeSignature::ArraySignature( |
343 | 0 | ArrayFunctionSignature::ArrayAndElementAndOptionalIndex, |
344 | 0 | ), |
345 | 0 | volatility, |
346 | 0 | } |
347 | 0 | } |
348 | | /// Specialized Signature for ArrayPrepend and similar functions |
349 | 0 | pub fn element_and_array(volatility: Volatility) -> Self { |
350 | 0 | Signature { |
351 | 0 | type_signature: TypeSignature::ArraySignature( |
352 | 0 | ArrayFunctionSignature::ElementAndArray, |
353 | 0 | ), |
354 | 0 | volatility, |
355 | 0 | } |
356 | 0 | } |
357 | | /// Specialized Signature for ArrayElement and similar functions |
358 | 0 | pub fn array_and_index(volatility: Volatility) -> Self { |
359 | 0 | Signature { |
360 | 0 | type_signature: TypeSignature::ArraySignature( |
361 | 0 | ArrayFunctionSignature::ArrayAndIndex, |
362 | 0 | ), |
363 | 0 | volatility, |
364 | 0 | } |
365 | 0 | } |
366 | | /// Specialized Signature for ArrayEmpty and similar functions |
367 | 0 | pub fn array(volatility: Volatility) -> Self { |
368 | 0 | Signature { |
369 | 0 | type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array), |
370 | 0 | volatility, |
371 | 0 | } |
372 | 0 | } |
373 | | } |
374 | | |
375 | | #[cfg(test)] |
376 | | mod tests { |
377 | | use super::*; |
378 | | |
379 | | #[test] |
380 | | fn supports_zero_argument_tests() { |
381 | | // Testing `TypeSignature`s which supports 0 arg |
382 | | let positive_cases = vec![ |
383 | | TypeSignature::Exact(vec![]), |
384 | | TypeSignature::Uniform(0, vec![DataType::Float64]), |
385 | | TypeSignature::Any(0), |
386 | | TypeSignature::OneOf(vec![ |
387 | | TypeSignature::Exact(vec![DataType::Int8]), |
388 | | TypeSignature::Any(0), |
389 | | TypeSignature::Uniform(1, vec![DataType::Int8]), |
390 | | ]), |
391 | | ]; |
392 | | |
393 | | for case in positive_cases { |
394 | | assert!( |
395 | | case.supports_zero_argument(), |
396 | | "Expected {:?} to support zero arguments", |
397 | | case |
398 | | ); |
399 | | } |
400 | | |
401 | | // Testing `TypeSignature`s which doesn't support 0 arg |
402 | | let negative_cases = vec![ |
403 | | TypeSignature::Exact(vec![DataType::Utf8]), |
404 | | TypeSignature::Uniform(1, vec![DataType::Float64]), |
405 | | TypeSignature::Any(1), |
406 | | TypeSignature::VariadicAny, |
407 | | TypeSignature::OneOf(vec![ |
408 | | TypeSignature::Exact(vec![DataType::Int8]), |
409 | | TypeSignature::Uniform(1, vec![DataType::Int8]), |
410 | | ]), |
411 | | ]; |
412 | | |
413 | | for case in negative_cases { |
414 | | assert!( |
415 | | !case.supports_zero_argument(), |
416 | | "Expected {:?} not to support zero arguments", |
417 | | case |
418 | | ); |
419 | | } |
420 | | } |
421 | | |
422 | | #[test] |
423 | | fn type_signature_partial_ord() { |
424 | | // Test validates that partial ord is defined for TypeSignature and Signature. |
425 | | assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny); |
426 | | assert!(TypeSignature::UserDefined < TypeSignature::Any(1)); |
427 | | |
428 | | assert!( |
429 | | TypeSignature::Uniform(1, vec![DataType::Null]) |
430 | | < TypeSignature::Uniform(1, vec![DataType::Boolean]) |
431 | | ); |
432 | | assert!( |
433 | | TypeSignature::Uniform(1, vec![DataType::Null]) |
434 | | < TypeSignature::Uniform(2, vec![DataType::Null]) |
435 | | ); |
436 | | assert!( |
437 | | TypeSignature::Uniform(usize::MAX, vec![DataType::Null]) |
438 | | < TypeSignature::Exact(vec![DataType::Null]) |
439 | | ); |
440 | | } |
441 | | } |