Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/expr-common/src/signature.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Signature module contains foundational types that are used to represent signatures, types,
19
//! and return types of functions in DataFusion.
20
21
use arrow::datatypes::DataType;
22
23
/// Constant that is used as a placeholder for any valid timezone.
24
/// This is used where a function can accept a timestamp type with any
25
/// valid timezone, it exists to avoid the need to enumerate all possible
26
/// timezones. See [`TypeSignature`] for more details.
27
///
28
/// Type coercion always ensures that functions will be executed using
29
/// timestamp arrays that have a valid time zone. Functions must never
30
/// return results with this timezone.
31
pub const TIMEZONE_WILDCARD: &str = "+TZ";
32
33
/// Constant that is used as a placeholder for any valid fixed size list.
34
/// This is used where a function can accept a fixed size list type with any
35
/// valid length. It exists to avoid the need to enumerate all possible fixed size list lengths.
36
pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
37
38
///A function's volatility, which defines the functions eligibility for certain optimizations
39
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
40
pub enum Volatility {
41
    /// An immutable function will always return the same output when given the same
42
    /// input. DataFusion will attempt to inline immutable functions during planning.
43
    Immutable,
44
    /// A stable function may return different values given the same input across different
45
    /// queries but must return the same value for a given input within a query. An example of
46
    /// this is the `Now` function. DataFusion will attempt to inline `Stable` functions
47
    /// during planning, when possible.
48
    /// For query `select col1, now() from t1`, it might take a while to execute but
49
    /// `now()` column will be the same for each output row, which is evaluated
50
    /// during planning.
51
    Stable,
52
    /// A volatile function may change the return value from evaluation to evaluation.
53
    /// Multiple invocations of a volatile function may return different results when used in the
54
    /// same query. An example of this is the random() function. DataFusion
55
    /// can not evaluate such functions during planning.
56
    /// In the query `select col1, random() from t1`, `random()` function will be evaluated
57
    /// for each output row, resulting in a unique random value for each row.
58
    Volatile,
59
}
60
61
/// A function's type signature defines the types of arguments the function supports.
62
///
63
/// Functions typically support only a few different types of arguments compared to the
64
/// different datatypes in Arrow. To make functions easy to use, when possible DataFusion
65
/// automatically coerces (add casts to) function arguments so they match the type signature.
66
///
67
/// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
68
/// that calls `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
69
/// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
70
///
71
/// # Data Types
72
/// Types to match are represented using Arrow's [`DataType`].  [`DataType::Timestamp`] has an optional variable
73
/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
74
/// the [`TIMEZONE_WILDCARD`]. For example:
75
///
76
/// ```
77
/// # use arrow::datatypes::{DataType, TimeUnit};
78
/// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
79
/// let type_signature = TypeSignature::Exact(vec![
80
///   // A nanosecond precision timestamp with ANY timezone
81
///   // matches  Timestamp(Nanosecond, Some("+0:00"))
82
///   // matches  Timestamp(Nanosecond, Some("+5:00"))
83
///   // does not match  Timestamp(Nanosecond, None)
84
///   DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
85
/// ]);
86
/// ```
87
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
88
pub enum TypeSignature {
89
    /// One or more arguments of an common type out of a list of valid types.
90
    ///
91
    /// # Examples
92
    /// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])`
93
    Variadic(Vec<DataType>),
94
    /// The acceptable signature and coercions rules to coerce arguments to this
95
    /// signature are special for this function. If this signature is specified,
96
    /// DataFusion will call `ScalarUDFImpl::coerce_types` to prepare argument types.
97
    UserDefined,
98
    /// One or more arguments with arbitrary types
99
    VariadicAny,
100
    /// Fixed number of arguments of an arbitrary but equal type out of a list of valid types.
101
    ///
102
    /// # Examples
103
    /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
104
    /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
105
    Uniform(usize, Vec<DataType>),
106
    /// Exact number of arguments of an exact type
107
    Exact(Vec<DataType>),
108
    /// The number of arguments that can be coerced to in order
109
    /// For example, `Coercible(vec![DataType::Float64])` accepts
110
    /// arguments like `vec![DataType::Int32]` or `vec![DataType::Float32]`
111
    /// since i32 and f32 can be casted to f64
112
    Coercible(Vec<DataType>),
113
    /// Fixed number of arguments of arbitrary types
114
    /// If a function takes 0 argument, its `TypeSignature` should be `Any(0)`
115
    Any(usize),
116
    /// Matches exactly one of a list of [`TypeSignature`]s. Coercion is attempted to match
117
    /// the signatures in order, and stops after the first success, if any.
118
    ///
119
    /// # Examples
120
    /// Function `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature`
121
    /// is `OneOf(vec![Any(0), VariadicAny])`.
122
    OneOf(Vec<TypeSignature>),
123
    /// Specifies Signatures for array functions
124
    ArraySignature(ArrayFunctionSignature),
125
    /// Fixed number of arguments of numeric types.
126
    /// See <https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_numeric> to know which type is considered numeric
127
    Numeric(usize),
128
}
129
130
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
131
pub enum ArrayFunctionSignature {
132
    /// Specialized Signature for ArrayAppend and similar functions
133
    /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list.
134
    /// The second argument's list dimension should be one dimension less than the first argument's list dimension.
135
    /// List dimension of the List/LargeList is equivalent to the number of List.
136
    /// List dimension of the non-list is 0.
137
    ArrayAndElement,
138
    /// Specialized Signature for ArrayPrepend and similar functions
139
    /// The first argument should be non-list or list, and the second argument should be List/LargeList.
140
    /// The first argument's list dimension should be one dimension less than the second argument's list dimension.
141
    ElementAndArray,
142
    /// Specialized Signature for Array functions of the form (List/LargeList, Index)
143
    /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64.
144
    ArrayAndIndex,
145
    /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index)
146
    ArrayAndElementAndOptionalIndex,
147
    /// Specialized Signature for ArrayEmpty and similar functions
148
    /// The function takes a single argument that must be a List/LargeList/FixedSizeList
149
    /// or something that can be coerced to one of those types.
150
    Array,
151
    /// Specialized Signature for MapArray
152
    /// The function takes a single argument that must be a MapArray
153
    MapArray,
154
}
155
156
impl std::fmt::Display for ArrayFunctionSignature {
157
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
158
0
        match self {
159
            ArrayFunctionSignature::ArrayAndElement => {
160
0
                write!(f, "array, element")
161
            }
162
            ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
163
0
                write!(f, "array, element, [index]")
164
            }
165
            ArrayFunctionSignature::ElementAndArray => {
166
0
                write!(f, "element, array")
167
            }
168
            ArrayFunctionSignature::ArrayAndIndex => {
169
0
                write!(f, "array, index")
170
            }
171
            ArrayFunctionSignature::Array => {
172
0
                write!(f, "array")
173
            }
174
            ArrayFunctionSignature::MapArray => {
175
0
                write!(f, "map_array")
176
            }
177
        }
178
0
    }
179
}
180
181
impl TypeSignature {
182
0
    pub fn to_string_repr(&self) -> Vec<String> {
183
0
        match self {
184
0
            TypeSignature::Variadic(types) => {
185
0
                vec![format!("{}, ..", Self::join_types(types, "/"))]
186
            }
187
0
            TypeSignature::Uniform(arg_count, valid_types) => {
188
0
                vec![std::iter::repeat(Self::join_types(valid_types, "/"))
189
0
                    .take(*arg_count)
190
0
                    .collect::<Vec<String>>()
191
0
                    .join(", ")]
192
            }
193
0
            TypeSignature::Numeric(num) => {
194
0
                vec![format!("Numeric({})", num)]
195
            }
196
0
            TypeSignature::Exact(types) | TypeSignature::Coercible(types) => {
197
0
                vec![Self::join_types(types, ", ")]
198
            }
199
0
            TypeSignature::Any(arg_count) => {
200
0
                vec![std::iter::repeat("Any")
201
0
                    .take(*arg_count)
202
0
                    .collect::<Vec<&str>>()
203
0
                    .join(", ")]
204
            }
205
            TypeSignature::UserDefined => {
206
0
                vec!["UserDefined".to_string()]
207
            }
208
0
            TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()],
209
0
            TypeSignature::OneOf(sigs) => {
210
0
                sigs.iter().flat_map(|s| s.to_string_repr()).collect()
211
            }
212
0
            TypeSignature::ArraySignature(array_signature) => {
213
0
                vec![array_signature.to_string()]
214
            }
215
        }
216
0
    }
217
218
    /// Helper function to join types with specified delimiter.
219
0
    pub fn join_types<T: std::fmt::Display>(types: &[T], delimiter: &str) -> String {
220
0
        types
221
0
            .iter()
222
0
            .map(|t| t.to_string())
223
0
            .collect::<Vec<String>>()
224
0
            .join(delimiter)
225
0
    }
226
227
    /// Check whether 0 input argument is valid for given `TypeSignature`
228
0
    pub fn supports_zero_argument(&self) -> bool {
229
0
        match &self {
230
0
            TypeSignature::Exact(vec) => vec.is_empty(),
231
0
            TypeSignature::Uniform(0, _) | TypeSignature::Any(0) => true,
232
0
            TypeSignature::OneOf(types) => types
233
0
                .iter()
234
0
                .any(|type_sig| type_sig.supports_zero_argument()),
235
0
            _ => false,
236
        }
237
0
    }
238
}
239
240
/// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function.
241
///
242
/// DataFusion will automatically coerce (cast) argument types to one of the supported
243
/// function signatures, if possible.
244
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
245
pub struct Signature {
246
    /// The data types that the function accepts. See [TypeSignature] for more information.
247
    pub type_signature: TypeSignature,
248
    /// The volatility of the function. See [Volatility] for more information.
249
    pub volatility: Volatility,
250
}
251
252
impl Signature {
253
    /// Creates a new Signature from a given type signature and volatility.
254
0
    pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
255
0
        Signature {
256
0
            type_signature,
257
0
            volatility,
258
0
        }
259
0
    }
260
    /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
261
0
    pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
262
0
        Self {
263
0
            type_signature: TypeSignature::Variadic(common_types),
264
0
            volatility,
265
0
        }
266
0
    }
267
    /// User-defined coercion rules for the function.
268
2
    pub fn user_defined(volatility: Volatility) -> Self {
269
2
        Self {
270
2
            type_signature: TypeSignature::UserDefined,
271
2
            volatility,
272
2
        }
273
2
    }
274
275
    /// A specified number of numeric arguments
276
1
    pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
277
1
        Self {
278
1
            type_signature: TypeSignature::Numeric(arg_count),
279
1
            volatility,
280
1
        }
281
1
    }
282
283
    /// An arbitrary number of arguments of any type.
284
0
    pub fn variadic_any(volatility: Volatility) -> Self {
285
0
        Self {
286
0
            type_signature: TypeSignature::VariadicAny,
287
0
            volatility,
288
0
        }
289
0
    }
290
    /// A fixed number of arguments of the same type, from those listed in `valid_types`.
291
0
    pub fn uniform(
292
0
        arg_count: usize,
293
0
        valid_types: Vec<DataType>,
294
0
        volatility: Volatility,
295
0
    ) -> Self {
296
0
        Self {
297
0
            type_signature: TypeSignature::Uniform(arg_count, valid_types),
298
0
            volatility,
299
0
        }
300
0
    }
301
    /// Exactly matches the types in `exact_types`, in order.
302
0
    pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
303
0
        Signature {
304
0
            type_signature: TypeSignature::Exact(exact_types),
305
0
            volatility,
306
0
        }
307
0
    }
308
    /// Target coerce types in order
309
0
    pub fn coercible(target_types: Vec<DataType>, volatility: Volatility) -> Self {
310
0
        Self {
311
0
            type_signature: TypeSignature::Coercible(target_types),
312
0
            volatility,
313
0
        }
314
0
    }
315
316
    /// A specified number of arguments of any type
317
1
    pub fn any(arg_count: usize, volatility: Volatility) -> Self {
318
1
        Signature {
319
1
            type_signature: TypeSignature::Any(arg_count),
320
1
            volatility,
321
1
        }
322
1
    }
323
    /// Any one of a list of [TypeSignature]s.
324
3
    pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
325
3
        Signature {
326
3
            type_signature: TypeSignature::OneOf(type_signatures),
327
3
            volatility,
328
3
        }
329
3
    }
330
    /// Specialized Signature for ArrayAppend and similar functions
331
0
    pub fn array_and_element(volatility: Volatility) -> Self {
332
0
        Signature {
333
0
            type_signature: TypeSignature::ArraySignature(
334
0
                ArrayFunctionSignature::ArrayAndElement,
335
0
            ),
336
0
            volatility,
337
0
        }
338
0
    }
339
    /// Specialized Signature for Array functions with an optional index
340
0
    pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
341
0
        Signature {
342
0
            type_signature: TypeSignature::ArraySignature(
343
0
                ArrayFunctionSignature::ArrayAndElementAndOptionalIndex,
344
0
            ),
345
0
            volatility,
346
0
        }
347
0
    }
348
    /// Specialized Signature for ArrayPrepend and similar functions
349
0
    pub fn element_and_array(volatility: Volatility) -> Self {
350
0
        Signature {
351
0
            type_signature: TypeSignature::ArraySignature(
352
0
                ArrayFunctionSignature::ElementAndArray,
353
0
            ),
354
0
            volatility,
355
0
        }
356
0
    }
357
    /// Specialized Signature for ArrayElement and similar functions
358
0
    pub fn array_and_index(volatility: Volatility) -> Self {
359
0
        Signature {
360
0
            type_signature: TypeSignature::ArraySignature(
361
0
                ArrayFunctionSignature::ArrayAndIndex,
362
0
            ),
363
0
            volatility,
364
0
        }
365
0
    }
366
    /// Specialized Signature for ArrayEmpty and similar functions
367
0
    pub fn array(volatility: Volatility) -> Self {
368
0
        Signature {
369
0
            type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
370
0
            volatility,
371
0
        }
372
0
    }
373
}
374
375
#[cfg(test)]
376
mod tests {
377
    use super::*;
378
379
    #[test]
380
    fn supports_zero_argument_tests() {
381
        // Testing `TypeSignature`s which supports 0 arg
382
        let positive_cases = vec![
383
            TypeSignature::Exact(vec![]),
384
            TypeSignature::Uniform(0, vec![DataType::Float64]),
385
            TypeSignature::Any(0),
386
            TypeSignature::OneOf(vec![
387
                TypeSignature::Exact(vec![DataType::Int8]),
388
                TypeSignature::Any(0),
389
                TypeSignature::Uniform(1, vec![DataType::Int8]),
390
            ]),
391
        ];
392
393
        for case in positive_cases {
394
            assert!(
395
                case.supports_zero_argument(),
396
                "Expected {:?} to support zero arguments",
397
                case
398
            );
399
        }
400
401
        // Testing `TypeSignature`s which doesn't support 0 arg
402
        let negative_cases = vec![
403
            TypeSignature::Exact(vec![DataType::Utf8]),
404
            TypeSignature::Uniform(1, vec![DataType::Float64]),
405
            TypeSignature::Any(1),
406
            TypeSignature::VariadicAny,
407
            TypeSignature::OneOf(vec![
408
                TypeSignature::Exact(vec![DataType::Int8]),
409
                TypeSignature::Uniform(1, vec![DataType::Int8]),
410
            ]),
411
        ];
412
413
        for case in negative_cases {
414
            assert!(
415
                !case.supports_zero_argument(),
416
                "Expected {:?} not to support zero arguments",
417
                case
418
            );
419
        }
420
    }
421
422
    #[test]
423
    fn type_signature_partial_ord() {
424
        // Test validates that partial ord is defined for TypeSignature and Signature.
425
        assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny);
426
        assert!(TypeSignature::UserDefined < TypeSignature::Any(1));
427
428
        assert!(
429
            TypeSignature::Uniform(1, vec![DataType::Null])
430
                < TypeSignature::Uniform(1, vec![DataType::Boolean])
431
        );
432
        assert!(
433
            TypeSignature::Uniform(1, vec![DataType::Null])
434
                < TypeSignature::Uniform(2, vec![DataType::Null])
435
        );
436
        assert!(
437
            TypeSignature::Uniform(usize::MAX, vec![DataType::Null])
438
                < TypeSignature::Exact(vec![DataType::Null])
439
        );
440
    }
441
}