/Users/andrewlamb/Software/datafusion/datafusion/expr/src/expr.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Logical Expressions: [`Expr`] |
19 | | |
20 | | use std::collections::{HashMap, HashSet}; |
21 | | use std::fmt::{self, Display, Formatter, Write}; |
22 | | use std::hash::{Hash, Hasher}; |
23 | | use std::mem; |
24 | | use std::str::FromStr; |
25 | | use std::sync::Arc; |
26 | | |
27 | | use crate::expr_fn::binary_expr; |
28 | | use crate::logical_plan::Subquery; |
29 | | use crate::utils::expr_to_columns; |
30 | | use crate::Volatility; |
31 | | use crate::{ |
32 | | built_in_window_function, udaf, BuiltInWindowFunction, ExprSchemable, Operator, |
33 | | Signature, WindowFrame, WindowUDF, |
34 | | }; |
35 | | |
36 | | use arrow::datatypes::{DataType, FieldRef}; |
37 | | use datafusion_common::tree_node::{ |
38 | | Transformed, TransformedResult, TreeNode, TreeNodeRecursion, |
39 | | }; |
40 | | use datafusion_common::{ |
41 | | plan_err, Column, DFSchema, Result, ScalarValue, TableReference, |
42 | | }; |
43 | | use datafusion_functions_window_common::field::WindowUDFFieldArgs; |
44 | | use sqlparser::ast::{ |
45 | | display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, |
46 | | NullTreatment, RenameSelectItem, ReplaceSelectElement, |
47 | | }; |
48 | | |
49 | | /// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`. |
50 | | /// |
51 | | /// For example the expression `A + 1` will be represented as |
52 | | /// |
53 | | ///```text |
54 | | /// BinaryExpr { |
55 | | /// left: Expr::Column("A"), |
56 | | /// op: Operator::Plus, |
57 | | /// right: Expr::Literal(ScalarValue::Int32(Some(1))) |
58 | | /// } |
59 | | /// ``` |
60 | | /// |
61 | | /// # Creating Expressions |
62 | | /// |
63 | | /// `Expr`s can be created directly, but it is often easier and less verbose to |
64 | | /// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or |
65 | | /// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]). |
66 | | /// |
67 | | /// See also [`ExprFunctionExt`] for creating aggregate and window functions. |
68 | | /// |
69 | | /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt |
70 | | /// |
71 | | /// # Schema Access |
72 | | /// |
73 | | /// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability |
74 | | /// of an `Expr`. |
75 | | /// |
76 | | /// # Visiting and Rewriting `Expr`s |
77 | | /// |
78 | | /// The `Expr` struct implements the [`TreeNode`] trait for walking and |
79 | | /// rewriting expressions. For example [`TreeNode::apply`] recursively visits an |
80 | | /// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See |
81 | | /// the examples below and [`TreeNode`] for more information. |
82 | | /// |
83 | | /// # Examples |
84 | | /// |
85 | | /// ## Column references and literals |
86 | | /// |
87 | | /// [`Expr::Column`] refer to the values of columns and are often created with |
88 | | /// the [`col`] function. For example to create an expression `c1` referring to |
89 | | /// column named "c1": |
90 | | /// |
91 | | /// [`col`]: crate::expr_fn::col |
92 | | /// |
93 | | /// ``` |
94 | | /// # use datafusion_common::Column; |
95 | | /// # use datafusion_expr::{lit, col, Expr}; |
96 | | /// let expr = col("c1"); |
97 | | /// assert_eq!(expr, Expr::Column(Column::from_name("c1"))); |
98 | | /// ``` |
99 | | /// |
100 | | /// [`Expr::Literal`] refer to literal, or constant, values. These are created |
101 | | /// with the [`lit`] function. For example to create an expression `42`: |
102 | | /// |
103 | | /// [`lit`]: crate::lit |
104 | | /// |
105 | | /// ``` |
106 | | /// # use datafusion_common::{Column, ScalarValue}; |
107 | | /// # use datafusion_expr::{lit, col, Expr}; |
108 | | /// // All literals are strongly typed in DataFusion. To make an `i64` 42: |
109 | | /// let expr = lit(42i64); |
110 | | /// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)))); |
111 | | /// // To make a (typed) NULL: |
112 | | /// let expr = Expr::Literal(ScalarValue::Int64(None)); |
113 | | /// // to make an (untyped) NULL (the optimizer will coerce this to the correct type): |
114 | | /// let expr = lit(ScalarValue::Null); |
115 | | /// ``` |
116 | | /// |
117 | | /// ## Binary Expressions |
118 | | /// |
119 | | /// Exprs implement traits that allow easy to understand construction of more |
120 | | /// complex expressions. For example, to create `c1 + c2` to add columns "c1" and |
121 | | /// "c2" together |
122 | | /// |
123 | | /// ``` |
124 | | /// # use datafusion_expr::{lit, col, Operator, Expr}; |
125 | | /// // Use the `+` operator to add two columns together |
126 | | /// let expr = col("c1") + col("c2"); |
127 | | /// assert!(matches!(expr, Expr::BinaryExpr { ..} )); |
128 | | /// if let Expr::BinaryExpr(binary_expr) = expr { |
129 | | /// assert_eq!(*binary_expr.left, col("c1")); |
130 | | /// assert_eq!(*binary_expr.right, col("c2")); |
131 | | /// assert_eq!(binary_expr.op, Operator::Plus); |
132 | | /// } |
133 | | /// ``` |
134 | | /// |
135 | | /// The expression `c1 = 42` to compares the value in column "c1" to the |
136 | | /// literal value `42`: |
137 | | /// |
138 | | /// ``` |
139 | | /// # use datafusion_common::ScalarValue; |
140 | | /// # use datafusion_expr::{lit, col, Operator, Expr}; |
141 | | /// let expr = col("c1").eq(lit(42_i32)); |
142 | | /// assert!(matches!(expr, Expr::BinaryExpr { .. } )); |
143 | | /// if let Expr::BinaryExpr(binary_expr) = expr { |
144 | | /// assert_eq!(*binary_expr.left, col("c1")); |
145 | | /// let scalar = ScalarValue::Int32(Some(42)); |
146 | | /// assert_eq!(*binary_expr.right, Expr::Literal(scalar)); |
147 | | /// assert_eq!(binary_expr.op, Operator::Eq); |
148 | | /// } |
149 | | /// ``` |
150 | | /// |
151 | | /// Here is how to implement the equivalent of `SELECT *` to select all |
152 | | /// [`Expr::Column`] from a [`DFSchema`]'s columns: |
153 | | /// |
154 | | /// ``` |
155 | | /// # use arrow::datatypes::{DataType, Field, Schema}; |
156 | | /// # use datafusion_common::{DFSchema, Column}; |
157 | | /// # use datafusion_expr::Expr; |
158 | | /// // Create a schema c1(int, c2 float) |
159 | | /// let arrow_schema = Schema::new(vec![ |
160 | | /// Field::new("c1", DataType::Int32, false), |
161 | | /// Field::new("c2", DataType::Float64, false), |
162 | | /// ]); |
163 | | /// // DFSchema is a an Arrow schema with optional relation name |
164 | | /// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema) |
165 | | /// .unwrap(); |
166 | | /// |
167 | | /// // Form Vec<Expr> with an expression for each column in the schema |
168 | | /// let exprs: Vec<_> = df_schema.iter() |
169 | | /// .map(Expr::from) |
170 | | /// .collect(); |
171 | | /// |
172 | | /// assert_eq!(exprs, vec![ |
173 | | /// Expr::from(Column::from_qualified_name("t1.c1")), |
174 | | /// Expr::from(Column::from_qualified_name("t1.c2")), |
175 | | /// ]); |
176 | | /// ``` |
177 | | /// |
178 | | /// # Visiting and Rewriting `Expr`s |
179 | | /// |
180 | | /// Here is an example that finds all literals in an `Expr` tree: |
181 | | /// ``` |
182 | | /// # use std::collections::{HashSet}; |
183 | | /// use datafusion_common::ScalarValue; |
184 | | /// # use datafusion_expr::{col, Expr, lit}; |
185 | | /// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; |
186 | | /// // Expression a = 5 AND b = 6 |
187 | | /// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6)); |
188 | | /// // find all literals in a HashMap |
189 | | /// let mut scalars = HashSet::new(); |
190 | | /// // apply recursively visits all nodes in the expression tree |
191 | | /// expr.apply(|e| { |
192 | | /// if let Expr::Literal(scalar) = e { |
193 | | /// scalars.insert(scalar); |
194 | | /// } |
195 | | /// // The return value controls whether to continue visiting the tree |
196 | | /// Ok(TreeNodeRecursion::Continue) |
197 | | /// }).unwrap(); |
198 | | /// // All subtrees have been visited and literals found |
199 | | /// assert_eq!(scalars.len(), 2); |
200 | | /// assert!(scalars.contains(&ScalarValue::Int32(Some(5)))); |
201 | | /// assert!(scalars.contains(&ScalarValue::Int32(Some(6)))); |
202 | | /// ``` |
203 | | /// |
204 | | /// Rewrite an expression, replacing references to column "a" in an |
205 | | /// to the literal `42`: |
206 | | /// |
207 | | /// ``` |
208 | | /// # use datafusion_common::tree_node::{Transformed, TreeNode}; |
209 | | /// # use datafusion_expr::{col, Expr, lit}; |
210 | | /// // expression a = 5 AND b = 6 |
211 | | /// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6))); |
212 | | /// // rewrite all references to column "a" to the literal 42 |
213 | | /// let rewritten = expr.transform(|e| { |
214 | | /// if let Expr::Column(c) = &e { |
215 | | /// if &c.name == "a" { |
216 | | /// // return Transformed::yes to indicate the node was changed |
217 | | /// return Ok(Transformed::yes(lit(42))) |
218 | | /// } |
219 | | /// } |
220 | | /// // return Transformed::no to indicate the node was not changed |
221 | | /// Ok(Transformed::no(e)) |
222 | | /// }).unwrap(); |
223 | | /// // The expression has been rewritten |
224 | | /// assert!(rewritten.transformed); |
225 | | /// // to 42 = 5 AND b = 6 |
226 | | /// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6)))); |
227 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
228 | | pub enum Expr { |
229 | | /// An expression with a specific name. |
230 | | Alias(Alias), |
231 | | /// A named reference to a qualified field in a schema. |
232 | | Column(Column), |
233 | | /// A named reference to a variable in a registry. |
234 | | ScalarVariable(DataType, Vec<String>), |
235 | | /// A constant value. |
236 | | Literal(ScalarValue), |
237 | | /// A binary expression such as "age > 21" |
238 | | BinaryExpr(BinaryExpr), |
239 | | /// LIKE expression |
240 | | Like(Like), |
241 | | /// LIKE expression that uses regular expressions |
242 | | SimilarTo(Like), |
243 | | /// Negation of an expression. The expression's type must be a boolean to make sense. |
244 | | Not(Box<Expr>), |
245 | | /// True if argument is not NULL, false otherwise. This expression itself is never NULL. |
246 | | IsNotNull(Box<Expr>), |
247 | | /// True if argument is NULL, false otherwise. This expression itself is never NULL. |
248 | | IsNull(Box<Expr>), |
249 | | /// True if argument is true, false otherwise. This expression itself is never NULL. |
250 | | IsTrue(Box<Expr>), |
251 | | /// True if argument is false, false otherwise. This expression itself is never NULL. |
252 | | IsFalse(Box<Expr>), |
253 | | /// True if argument is NULL, false otherwise. This expression itself is never NULL. |
254 | | IsUnknown(Box<Expr>), |
255 | | /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL. |
256 | | IsNotTrue(Box<Expr>), |
257 | | /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL. |
258 | | IsNotFalse(Box<Expr>), |
259 | | /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL. |
260 | | IsNotUnknown(Box<Expr>), |
261 | | /// arithmetic negation of an expression, the operand must be of a signed numeric data type |
262 | | Negative(Box<Expr>), |
263 | | /// Whether an expression is between a given range. |
264 | | Between(Between), |
265 | | /// The CASE expression is similar to a series of nested if/else and there are two forms that |
266 | | /// can be used. The first form consists of a series of boolean "when" expressions with |
267 | | /// corresponding "then" expressions, and an optional "else" expression. |
268 | | /// |
269 | | /// ```text |
270 | | /// CASE WHEN condition THEN result |
271 | | /// [WHEN ...] |
272 | | /// [ELSE result] |
273 | | /// END |
274 | | /// ``` |
275 | | /// |
276 | | /// The second form uses a base expression and then a series of "when" clauses that match on a |
277 | | /// literal value. |
278 | | /// |
279 | | /// ```text |
280 | | /// CASE expression |
281 | | /// WHEN value THEN result |
282 | | /// [WHEN ...] |
283 | | /// [ELSE result] |
284 | | /// END |
285 | | /// ``` |
286 | | Case(Case), |
287 | | /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast. |
288 | | /// This expression is guaranteed to have a fixed type. |
289 | | Cast(Cast), |
290 | | /// Casts the expression to a given type and will return a null value if the expression cannot be cast. |
291 | | /// This expression is guaranteed to have a fixed type. |
292 | | TryCast(TryCast), |
293 | | /// Represents the call of a scalar function with a set of arguments. |
294 | | ScalarFunction(ScalarFunction), |
295 | | /// Calls an aggregate function with arguments, and optional |
296 | | /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`. |
297 | | /// |
298 | | /// See also [`ExprFunctionExt`] to set these fields. |
299 | | /// |
300 | | /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt |
301 | | AggregateFunction(AggregateFunction), |
302 | | /// Represents the call of a window function with arguments. |
303 | | WindowFunction(WindowFunction), |
304 | | /// Returns whether the list contains the expr value. |
305 | | InList(InList), |
306 | | /// EXISTS subquery |
307 | | Exists(Exists), |
308 | | /// IN subquery |
309 | | InSubquery(InSubquery), |
310 | | /// Scalar subquery |
311 | | ScalarSubquery(Subquery), |
312 | | /// Represents a reference to all available fields in a specific schema, |
313 | | /// with an optional (schema) qualifier. |
314 | | /// |
315 | | /// This expr has to be resolved to a list of columns before translating logical |
316 | | /// plan into physical plan. |
317 | | Wildcard { |
318 | | qualifier: Option<TableReference>, |
319 | | options: WildcardOptions, |
320 | | }, |
321 | | /// List of grouping set expressions. Only valid in the context of an aggregate |
322 | | /// GROUP BY expression list |
323 | | GroupingSet(GroupingSet), |
324 | | /// A place holder for parameters in a prepared statement |
325 | | /// (e.g. `$foo` or `$1`) |
326 | | Placeholder(Placeholder), |
327 | | /// A place holder which hold a reference to a qualified field |
328 | | /// in the outer query, used for correlated sub queries. |
329 | | OuterReferenceColumn(DataType, Column), |
330 | | /// Unnest expression |
331 | | Unnest(Unnest), |
332 | | } |
333 | | |
334 | | impl Default for Expr { |
335 | 0 | fn default() -> Self { |
336 | 0 | Expr::Literal(ScalarValue::Null) |
337 | 0 | } |
338 | | } |
339 | | |
340 | | /// Create an [`Expr`] from a [`Column`] |
341 | | impl From<Column> for Expr { |
342 | 0 | fn from(value: Column) -> Self { |
343 | 0 | Expr::Column(value) |
344 | 0 | } |
345 | | } |
346 | | |
347 | | /// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is |
348 | | /// useful for creating [`Expr`] from a [`DFSchema`]. |
349 | | /// |
350 | | /// See example on [`Expr`] |
351 | | impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr { |
352 | 0 | fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self { |
353 | 0 | Expr::from(Column::from(value)) |
354 | 0 | } |
355 | | } |
356 | | |
357 | | /// UNNEST expression. |
358 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
359 | | pub struct Unnest { |
360 | | pub expr: Box<Expr>, |
361 | | } |
362 | | |
363 | | impl Unnest { |
364 | | /// Create a new Unnest expression. |
365 | 0 | pub fn new(expr: Expr) -> Self { |
366 | 0 | Self { |
367 | 0 | expr: Box::new(expr), |
368 | 0 | } |
369 | 0 | } |
370 | | |
371 | | /// Create a new Unnest expression. |
372 | 0 | pub fn new_boxed(boxed: Box<Expr>) -> Self { |
373 | 0 | Self { expr: boxed } |
374 | 0 | } |
375 | | } |
376 | | |
377 | | /// Alias expression |
378 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
379 | | pub struct Alias { |
380 | | pub expr: Box<Expr>, |
381 | | pub relation: Option<TableReference>, |
382 | | pub name: String, |
383 | | } |
384 | | |
385 | | impl Alias { |
386 | | /// Create an alias with an optional schema/field qualifier. |
387 | 0 | pub fn new( |
388 | 0 | expr: Expr, |
389 | 0 | relation: Option<impl Into<TableReference>>, |
390 | 0 | name: impl Into<String>, |
391 | 0 | ) -> Self { |
392 | 0 | Self { |
393 | 0 | expr: Box::new(expr), |
394 | 0 | relation: relation.map(|r| r.into()), |
395 | 0 | name: name.into(), |
396 | 0 | } |
397 | 0 | } |
398 | | } |
399 | | |
400 | | /// Binary expression |
401 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
402 | | pub struct BinaryExpr { |
403 | | /// Left-hand side of the expression |
404 | | pub left: Box<Expr>, |
405 | | /// The comparison operator |
406 | | pub op: Operator, |
407 | | /// Right-hand side of the expression |
408 | | pub right: Box<Expr>, |
409 | | } |
410 | | |
411 | | impl BinaryExpr { |
412 | | /// Create a new binary expression |
413 | 0 | pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self { |
414 | 0 | Self { left, op, right } |
415 | 0 | } |
416 | | } |
417 | | |
418 | | impl Display for BinaryExpr { |
419 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
420 | | // Put parentheses around child binary expressions so that we can see the difference |
421 | | // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed, |
422 | | // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are |
423 | | // equivalent and the parentheses are not necessary. |
424 | | |
425 | 0 | fn write_child( |
426 | 0 | f: &mut Formatter<'_>, |
427 | 0 | expr: &Expr, |
428 | 0 | precedence: u8, |
429 | 0 | ) -> fmt::Result { |
430 | 0 | match expr { |
431 | 0 | Expr::BinaryExpr(child) => { |
432 | 0 | let p = child.op.precedence(); |
433 | 0 | if p == 0 || p < precedence { |
434 | 0 | write!(f, "({child})")?; |
435 | | } else { |
436 | 0 | write!(f, "{child}")?; |
437 | | } |
438 | | } |
439 | 0 | _ => write!(f, "{expr}")?, |
440 | | } |
441 | 0 | Ok(()) |
442 | 0 | } |
443 | | |
444 | 0 | let precedence = self.op.precedence(); |
445 | 0 | write_child(f, self.left.as_ref(), precedence)?; |
446 | 0 | write!(f, " {} ", self.op)?; |
447 | 0 | write_child(f, self.right.as_ref(), precedence) |
448 | 0 | } |
449 | | } |
450 | | |
451 | | /// CASE expression |
452 | | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)] |
453 | | pub struct Case { |
454 | | /// Optional base expression that can be compared to literal values in the "when" expressions |
455 | | pub expr: Option<Box<Expr>>, |
456 | | /// One or more when/then expressions |
457 | | pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>, |
458 | | /// Optional "else" expression |
459 | | pub else_expr: Option<Box<Expr>>, |
460 | | } |
461 | | |
462 | | impl Case { |
463 | | /// Create a new Case expression |
464 | 0 | pub fn new( |
465 | 0 | expr: Option<Box<Expr>>, |
466 | 0 | when_then_expr: Vec<(Box<Expr>, Box<Expr>)>, |
467 | 0 | else_expr: Option<Box<Expr>>, |
468 | 0 | ) -> Self { |
469 | 0 | Self { |
470 | 0 | expr, |
471 | 0 | when_then_expr, |
472 | 0 | else_expr, |
473 | 0 | } |
474 | 0 | } |
475 | | } |
476 | | |
477 | | /// LIKE expression |
478 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
479 | | pub struct Like { |
480 | | pub negated: bool, |
481 | | pub expr: Box<Expr>, |
482 | | pub pattern: Box<Expr>, |
483 | | pub escape_char: Option<char>, |
484 | | /// Whether to ignore case on comparing |
485 | | pub case_insensitive: bool, |
486 | | } |
487 | | |
488 | | impl Like { |
489 | | /// Create a new Like expression |
490 | 0 | pub fn new( |
491 | 0 | negated: bool, |
492 | 0 | expr: Box<Expr>, |
493 | 0 | pattern: Box<Expr>, |
494 | 0 | escape_char: Option<char>, |
495 | 0 | case_insensitive: bool, |
496 | 0 | ) -> Self { |
497 | 0 | Self { |
498 | 0 | negated, |
499 | 0 | expr, |
500 | 0 | pattern, |
501 | 0 | escape_char, |
502 | 0 | case_insensitive, |
503 | 0 | } |
504 | 0 | } |
505 | | } |
506 | | |
507 | | /// BETWEEN expression |
508 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
509 | | pub struct Between { |
510 | | /// The value to compare |
511 | | pub expr: Box<Expr>, |
512 | | /// Whether the expression is negated |
513 | | pub negated: bool, |
514 | | /// The low end of the range |
515 | | pub low: Box<Expr>, |
516 | | /// The high end of the range |
517 | | pub high: Box<Expr>, |
518 | | } |
519 | | |
520 | | impl Between { |
521 | | /// Create a new Between expression |
522 | 0 | pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self { |
523 | 0 | Self { |
524 | 0 | expr, |
525 | 0 | negated, |
526 | 0 | low, |
527 | 0 | high, |
528 | 0 | } |
529 | 0 | } |
530 | | } |
531 | | |
532 | | /// ScalarFunction expression invokes a built-in scalar function |
533 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
534 | | pub struct ScalarFunction { |
535 | | /// The function |
536 | | pub func: Arc<crate::ScalarUDF>, |
537 | | /// List of expressions to feed to the functions as arguments |
538 | | pub args: Vec<Expr>, |
539 | | } |
540 | | |
541 | | impl ScalarFunction { |
542 | | // return the Function's name |
543 | 0 | pub fn name(&self) -> &str { |
544 | 0 | self.func.name() |
545 | 0 | } |
546 | | } |
547 | | |
548 | | impl ScalarFunction { |
549 | | /// Create a new ScalarFunction expression with a user-defined function (UDF) |
550 | 0 | pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self { |
551 | 0 | Self { func: udf, args } |
552 | 0 | } |
553 | | } |
554 | | |
555 | | /// Access a sub field of a nested type, such as `Field` or `List` |
556 | | #[derive(Clone, PartialEq, Eq, Hash, Debug)] |
557 | | pub enum GetFieldAccess { |
558 | | /// Named field, for example `struct["name"]` |
559 | | NamedStructField { name: ScalarValue }, |
560 | | /// Single list index, for example: `list[i]` |
561 | | ListIndex { key: Box<Expr> }, |
562 | | /// List stride, for example `list[i:j:k]` |
563 | | ListRange { |
564 | | start: Box<Expr>, |
565 | | stop: Box<Expr>, |
566 | | stride: Box<Expr>, |
567 | | }, |
568 | | } |
569 | | |
570 | | /// Cast expression |
571 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
572 | | pub struct Cast { |
573 | | /// The expression being cast |
574 | | pub expr: Box<Expr>, |
575 | | /// The `DataType` the expression will yield |
576 | | pub data_type: DataType, |
577 | | } |
578 | | |
579 | | impl Cast { |
580 | | /// Create a new Cast expression |
581 | 0 | pub fn new(expr: Box<Expr>, data_type: DataType) -> Self { |
582 | 0 | Self { expr, data_type } |
583 | 0 | } |
584 | | } |
585 | | |
586 | | /// TryCast Expression |
587 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
588 | | pub struct TryCast { |
589 | | /// The expression being cast |
590 | | pub expr: Box<Expr>, |
591 | | /// The `DataType` the expression will yield |
592 | | pub data_type: DataType, |
593 | | } |
594 | | |
595 | | impl TryCast { |
596 | | /// Create a new TryCast expression |
597 | 0 | pub fn new(expr: Box<Expr>, data_type: DataType) -> Self { |
598 | 0 | Self { expr, data_type } |
599 | 0 | } |
600 | | } |
601 | | |
602 | | /// SORT expression |
603 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
604 | | pub struct Sort { |
605 | | /// The expression to sort on |
606 | | pub expr: Expr, |
607 | | /// The direction of the sort |
608 | | pub asc: bool, |
609 | | /// Whether to put Nulls before all other data values |
610 | | pub nulls_first: bool, |
611 | | } |
612 | | |
613 | | impl Sort { |
614 | | /// Create a new Sort expression |
615 | 0 | pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self { |
616 | 0 | Self { |
617 | 0 | expr, |
618 | 0 | asc, |
619 | 0 | nulls_first, |
620 | 0 | } |
621 | 0 | } |
622 | | |
623 | | /// Create a new Sort expression with the opposite sort direction |
624 | 0 | pub fn reverse(&self) -> Self { |
625 | 0 | Self { |
626 | 0 | expr: self.expr.clone(), |
627 | 0 | asc: !self.asc, |
628 | 0 | nulls_first: !self.nulls_first, |
629 | 0 | } |
630 | 0 | } |
631 | | } |
632 | | |
633 | | impl Display for Sort { |
634 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
635 | 0 | write!(f, "{}", self.expr)?; |
636 | 0 | if self.asc { |
637 | 0 | write!(f, " ASC")?; |
638 | | } else { |
639 | 0 | write!(f, " DESC")?; |
640 | | } |
641 | 0 | if self.nulls_first { |
642 | 0 | write!(f, " NULLS FIRST")?; |
643 | | } else { |
644 | 0 | write!(f, " NULLS LAST")?; |
645 | | } |
646 | 0 | Ok(()) |
647 | 0 | } |
648 | | } |
649 | | |
650 | | /// Aggregate function |
651 | | /// |
652 | | /// See also [`ExprFunctionExt`] to set these fields on `Expr` |
653 | | /// |
654 | | /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt |
655 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
656 | | pub struct AggregateFunction { |
657 | | /// Name of the function |
658 | | pub func: Arc<crate::AggregateUDF>, |
659 | | /// List of expressions to feed to the functions as arguments |
660 | | pub args: Vec<Expr>, |
661 | | /// Whether this is a DISTINCT aggregation or not |
662 | | pub distinct: bool, |
663 | | /// Optional filter |
664 | | pub filter: Option<Box<Expr>>, |
665 | | /// Optional ordering |
666 | | pub order_by: Option<Vec<Sort>>, |
667 | | pub null_treatment: Option<NullTreatment>, |
668 | | } |
669 | | |
670 | | impl AggregateFunction { |
671 | | /// Create a new AggregateFunction expression with a user-defined function (UDF) |
672 | 0 | pub fn new_udf( |
673 | 0 | func: Arc<crate::AggregateUDF>, |
674 | 0 | args: Vec<Expr>, |
675 | 0 | distinct: bool, |
676 | 0 | filter: Option<Box<Expr>>, |
677 | 0 | order_by: Option<Vec<Sort>>, |
678 | 0 | null_treatment: Option<NullTreatment>, |
679 | 0 | ) -> Self { |
680 | 0 | Self { |
681 | 0 | func, |
682 | 0 | args, |
683 | 0 | distinct, |
684 | 0 | filter, |
685 | 0 | order_by, |
686 | 0 | null_treatment, |
687 | 0 | } |
688 | 0 | } |
689 | | } |
690 | | |
691 | | /// WindowFunction |
692 | | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] |
693 | | /// Defines which implementation of an aggregate function DataFusion should call. |
694 | | pub enum WindowFunctionDefinition { |
695 | | /// A built in aggregate function that leverages an aggregate function |
696 | | /// A a built-in window function |
697 | | BuiltInWindowFunction(built_in_window_function::BuiltInWindowFunction), |
698 | | /// A user defined aggregate function |
699 | | AggregateUDF(Arc<crate::AggregateUDF>), |
700 | | /// A user defined aggregate function |
701 | | WindowUDF(Arc<crate::WindowUDF>), |
702 | | } |
703 | | |
704 | | impl WindowFunctionDefinition { |
705 | | /// Returns the datatype of the window function |
706 | 0 | pub fn return_type( |
707 | 0 | &self, |
708 | 0 | input_expr_types: &[DataType], |
709 | 0 | _input_expr_nullable: &[bool], |
710 | 0 | display_name: &str, |
711 | 0 | ) -> Result<DataType> { |
712 | 0 | match self { |
713 | 0 | WindowFunctionDefinition::BuiltInWindowFunction(fun) => { |
714 | 0 | fun.return_type(input_expr_types) |
715 | | } |
716 | 0 | WindowFunctionDefinition::AggregateUDF(fun) => { |
717 | 0 | fun.return_type(input_expr_types) |
718 | | } |
719 | 0 | WindowFunctionDefinition::WindowUDF(fun) => fun |
720 | 0 | .field(WindowUDFFieldArgs::new(input_expr_types, display_name)) |
721 | 0 | .map(|field| field.data_type().clone()), |
722 | | } |
723 | 0 | } |
724 | | |
725 | | /// the signatures supported by the function `fun`. |
726 | 0 | pub fn signature(&self) -> Signature { |
727 | 0 | match self { |
728 | 0 | WindowFunctionDefinition::BuiltInWindowFunction(fun) => fun.signature(), |
729 | 0 | WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(), |
730 | 0 | WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(), |
731 | | } |
732 | 0 | } |
733 | | |
734 | | /// Function's name for display |
735 | 0 | pub fn name(&self) -> &str { |
736 | 0 | match self { |
737 | 0 | WindowFunctionDefinition::BuiltInWindowFunction(fun) => fun.name(), |
738 | 0 | WindowFunctionDefinition::WindowUDF(fun) => fun.name(), |
739 | 0 | WindowFunctionDefinition::AggregateUDF(fun) => fun.name(), |
740 | | } |
741 | 0 | } |
742 | | } |
743 | | |
744 | | impl fmt::Display for WindowFunctionDefinition { |
745 | 1 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
746 | 1 | match self { |
747 | 0 | WindowFunctionDefinition::BuiltInWindowFunction(fun) => { |
748 | 0 | std::fmt::Display::fmt(fun, f) |
749 | | } |
750 | 1 | WindowFunctionDefinition::AggregateUDF(fun) => std::fmt::Display::fmt(fun, f), |
751 | 0 | WindowFunctionDefinition::WindowUDF(fun) => std::fmt::Display::fmt(fun, f), |
752 | | } |
753 | 1 | } |
754 | | } |
755 | | |
756 | | impl From<BuiltInWindowFunction> for WindowFunctionDefinition { |
757 | 0 | fn from(value: BuiltInWindowFunction) -> Self { |
758 | 0 | Self::BuiltInWindowFunction(value) |
759 | 0 | } |
760 | | } |
761 | | |
762 | | impl From<Arc<crate::AggregateUDF>> for WindowFunctionDefinition { |
763 | 0 | fn from(value: Arc<crate::AggregateUDF>) -> Self { |
764 | 0 | Self::AggregateUDF(value) |
765 | 0 | } |
766 | | } |
767 | | |
768 | | impl From<Arc<WindowUDF>> for WindowFunctionDefinition { |
769 | 0 | fn from(value: Arc<WindowUDF>) -> Self { |
770 | 0 | Self::WindowUDF(value) |
771 | 0 | } |
772 | | } |
773 | | |
774 | | /// Window function |
775 | | /// |
776 | | /// Holds the actual actual function to call [`WindowFunction`] as well as its |
777 | | /// arguments (`args`) and the contents of the `OVER` clause: |
778 | | /// |
779 | | /// 1. `PARTITION BY` |
780 | | /// 2. `ORDER BY` |
781 | | /// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`) |
782 | | /// |
783 | | /// # Example |
784 | | /// ``` |
785 | | /// # use datafusion_expr::{Expr, BuiltInWindowFunction, col, ExprFunctionExt}; |
786 | | /// # use datafusion_expr::expr::WindowFunction; |
787 | | /// // Create FIRST_VALUE(a) OVER (PARTITION BY b ORDER BY c) |
788 | | /// let expr = Expr::WindowFunction( |
789 | | /// WindowFunction::new(BuiltInWindowFunction::FirstValue, vec![col("a")]) |
790 | | /// ) |
791 | | /// .partition_by(vec![col("b")]) |
792 | | /// .order_by(vec![col("b").sort(true, true)]) |
793 | | /// .build() |
794 | | /// .unwrap(); |
795 | | /// ``` |
796 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
797 | | pub struct WindowFunction { |
798 | | /// Name of the function |
799 | | pub fun: WindowFunctionDefinition, |
800 | | /// List of expressions to feed to the functions as arguments |
801 | | pub args: Vec<Expr>, |
802 | | /// List of partition by expressions |
803 | | pub partition_by: Vec<Expr>, |
804 | | /// List of order by expressions |
805 | | pub order_by: Vec<Sort>, |
806 | | /// Window frame |
807 | | pub window_frame: WindowFrame, |
808 | | /// Specifies how NULL value is treated: ignore or respect |
809 | | pub null_treatment: Option<NullTreatment>, |
810 | | } |
811 | | |
812 | | impl WindowFunction { |
813 | | /// Create a new Window expression with the specified argument an |
814 | | /// empty `OVER` clause |
815 | 0 | pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self { |
816 | 0 | Self { |
817 | 0 | fun: fun.into(), |
818 | 0 | args, |
819 | 0 | partition_by: Vec::default(), |
820 | 0 | order_by: Vec::default(), |
821 | 0 | window_frame: WindowFrame::new(None), |
822 | 0 | null_treatment: None, |
823 | 0 | } |
824 | 0 | } |
825 | | } |
826 | | |
827 | | /// Find DataFusion's built-in window function by name. |
828 | 0 | pub fn find_df_window_func(name: &str) -> Option<WindowFunctionDefinition> { |
829 | 0 | let name = name.to_lowercase(); |
830 | | // Code paths for window functions leveraging ordinary aggregators and |
831 | | // built-in window functions are quite different, and the same function |
832 | | // may have different implementations for these cases. If the sought |
833 | | // function is not found among built-in window functions, we search for |
834 | | // it among aggregate functions. |
835 | 0 | if let Ok(built_in_function) = |
836 | 0 | built_in_window_function::BuiltInWindowFunction::from_str(name.as_str()) |
837 | | { |
838 | 0 | Some(WindowFunctionDefinition::BuiltInWindowFunction( |
839 | 0 | built_in_function, |
840 | 0 | )) |
841 | | } else { |
842 | 0 | None |
843 | | } |
844 | 0 | } |
845 | | |
846 | | /// EXISTS expression |
847 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
848 | | pub struct Exists { |
849 | | /// subquery that will produce a single column of data |
850 | | pub subquery: Subquery, |
851 | | /// Whether the expression is negated |
852 | | pub negated: bool, |
853 | | } |
854 | | |
855 | | impl Exists { |
856 | | // Create a new Exists expression. |
857 | 0 | pub fn new(subquery: Subquery, negated: bool) -> Self { |
858 | 0 | Self { subquery, negated } |
859 | 0 | } |
860 | | } |
861 | | |
862 | | /// User Defined Aggregate Function |
863 | | /// |
864 | | /// See [`udaf::AggregateUDF`] for more information. |
865 | | #[derive(Clone, PartialEq, Eq, Hash, Debug)] |
866 | | pub struct AggregateUDF { |
867 | | /// The function |
868 | | pub fun: Arc<udaf::AggregateUDF>, |
869 | | /// List of expressions to feed to the functions as arguments |
870 | | pub args: Vec<Expr>, |
871 | | /// Optional filter |
872 | | pub filter: Option<Box<Expr>>, |
873 | | /// Optional ORDER BY applied prior to aggregating |
874 | | pub order_by: Option<Vec<Expr>>, |
875 | | } |
876 | | |
877 | | impl AggregateUDF { |
878 | | /// Create a new AggregateUDF expression |
879 | 0 | pub fn new( |
880 | 0 | fun: Arc<udaf::AggregateUDF>, |
881 | 0 | args: Vec<Expr>, |
882 | 0 | filter: Option<Box<Expr>>, |
883 | 0 | order_by: Option<Vec<Expr>>, |
884 | 0 | ) -> Self { |
885 | 0 | Self { |
886 | 0 | fun, |
887 | 0 | args, |
888 | 0 | filter, |
889 | 0 | order_by, |
890 | 0 | } |
891 | 0 | } |
892 | | } |
893 | | |
894 | | /// InList expression |
895 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
896 | | pub struct InList { |
897 | | /// The expression to compare |
898 | | pub expr: Box<Expr>, |
899 | | /// The list of values to compare against |
900 | | pub list: Vec<Expr>, |
901 | | /// Whether the expression is negated |
902 | | pub negated: bool, |
903 | | } |
904 | | |
905 | | impl InList { |
906 | | /// Create a new InList expression |
907 | 0 | pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self { |
908 | 0 | Self { |
909 | 0 | expr, |
910 | 0 | list, |
911 | 0 | negated, |
912 | 0 | } |
913 | 0 | } |
914 | | } |
915 | | |
916 | | /// IN subquery |
917 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
918 | | pub struct InSubquery { |
919 | | /// The expression to compare |
920 | | pub expr: Box<Expr>, |
921 | | /// Subquery that will produce a single column of data to compare against |
922 | | pub subquery: Subquery, |
923 | | /// Whether the expression is negated |
924 | | pub negated: bool, |
925 | | } |
926 | | |
927 | | impl InSubquery { |
928 | | /// Create a new InSubquery expression |
929 | 0 | pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self { |
930 | 0 | Self { |
931 | 0 | expr, |
932 | 0 | subquery, |
933 | 0 | negated, |
934 | 0 | } |
935 | 0 | } |
936 | | } |
937 | | |
938 | | /// Placeholder, representing bind parameter values such as `$1` or `$name`. |
939 | | /// |
940 | | /// The type of these parameters is inferred using [`Expr::infer_placeholder_types`] |
941 | | /// or can be specified directly using `PREPARE` statements. |
942 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
943 | | pub struct Placeholder { |
944 | | /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`) |
945 | | pub id: String, |
946 | | /// The type the parameter will be filled in with |
947 | | pub data_type: Option<DataType>, |
948 | | } |
949 | | |
950 | | impl Placeholder { |
951 | | /// Create a new Placeholder expression |
952 | 0 | pub fn new(id: String, data_type: Option<DataType>) -> Self { |
953 | 0 | Self { id, data_type } |
954 | 0 | } |
955 | | } |
956 | | |
957 | | /// Grouping sets |
958 | | /// |
959 | | /// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS> |
960 | | /// for Postgres definition. |
961 | | /// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html> |
962 | | /// for Apache Spark definition. |
963 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
964 | | pub enum GroupingSet { |
965 | | /// Rollup grouping sets |
966 | | Rollup(Vec<Expr>), |
967 | | /// Cube grouping sets |
968 | | Cube(Vec<Expr>), |
969 | | /// User-defined grouping sets |
970 | | GroupingSets(Vec<Vec<Expr>>), |
971 | | } |
972 | | |
973 | | impl GroupingSet { |
974 | | /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this |
975 | | /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate |
976 | | /// the exprs in the underlying sets. |
977 | 0 | pub fn distinct_expr(&self) -> Vec<&Expr> { |
978 | 0 | match self { |
979 | 0 | GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => { |
980 | 0 | exprs.iter().collect() |
981 | | } |
982 | 0 | GroupingSet::GroupingSets(groups) => { |
983 | 0 | let mut exprs: Vec<&Expr> = vec![]; |
984 | 0 | for exp in groups.iter().flatten() { |
985 | 0 | if !exprs.contains(&exp) { |
986 | 0 | exprs.push(exp); |
987 | 0 | } |
988 | | } |
989 | 0 | exprs |
990 | | } |
991 | | } |
992 | 0 | } |
993 | | } |
994 | | |
995 | | /// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`. |
996 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)] |
997 | | pub struct WildcardOptions { |
998 | | /// `[ILIKE...]`. |
999 | | /// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters> |
1000 | | pub ilike: Option<IlikeSelectItem>, |
1001 | | /// `[EXCLUDE...]`. |
1002 | | /// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters> |
1003 | | pub exclude: Option<ExcludeSelectItem>, |
1004 | | /// `[EXCEPT...]`. |
1005 | | /// BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except> |
1006 | | /// Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except> |
1007 | | pub except: Option<ExceptSelectItem>, |
1008 | | /// `[REPLACE]` |
1009 | | /// BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace> |
1010 | | /// Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace> |
1011 | | /// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters> |
1012 | | pub replace: Option<PlannedReplaceSelectItem>, |
1013 | | /// `[RENAME ...]`. |
1014 | | /// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters> |
1015 | | pub rename: Option<RenameSelectItem>, |
1016 | | } |
1017 | | |
1018 | | impl WildcardOptions { |
1019 | 0 | pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self { |
1020 | 0 | WildcardOptions { |
1021 | 0 | ilike: self.ilike, |
1022 | 0 | exclude: self.exclude, |
1023 | 0 | except: self.except, |
1024 | 0 | replace: Some(replace), |
1025 | 0 | rename: self.rename, |
1026 | 0 | } |
1027 | 0 | } |
1028 | | } |
1029 | | |
1030 | | impl Display for WildcardOptions { |
1031 | 0 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
1032 | 0 | if let Some(ilike) = &self.ilike { |
1033 | 0 | write!(f, " {ilike}")?; |
1034 | 0 | } |
1035 | 0 | if let Some(exclude) = &self.exclude { |
1036 | 0 | write!(f, " {exclude}")?; |
1037 | 0 | } |
1038 | 0 | if let Some(except) = &self.except { |
1039 | 0 | write!(f, " {except}")?; |
1040 | 0 | } |
1041 | 0 | if let Some(replace) = &self.replace { |
1042 | 0 | write!(f, " {replace}")?; |
1043 | 0 | } |
1044 | 0 | if let Some(rename) = &self.rename { |
1045 | 0 | write!(f, " {rename}")?; |
1046 | 0 | } |
1047 | 0 | Ok(()) |
1048 | 0 | } |
1049 | | } |
1050 | | |
1051 | | /// The planned expressions for `REPLACE` |
1052 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)] |
1053 | | pub struct PlannedReplaceSelectItem { |
1054 | | /// The original ast nodes |
1055 | | pub items: Vec<ReplaceSelectElement>, |
1056 | | /// The expression planned from the ast nodes. They will be used when expanding the wildcard. |
1057 | | pub planned_expressions: Vec<Expr>, |
1058 | | } |
1059 | | |
1060 | | impl Display for PlannedReplaceSelectItem { |
1061 | 0 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
1062 | 0 | write!(f, "REPLACE")?; |
1063 | 0 | write!(f, " ({})", display_comma_separated(&self.items))?; |
1064 | 0 | Ok(()) |
1065 | 0 | } |
1066 | | } |
1067 | | |
1068 | | impl PlannedReplaceSelectItem { |
1069 | 0 | pub fn items(&self) -> &[ReplaceSelectElement] { |
1070 | 0 | &self.items |
1071 | 0 | } |
1072 | | |
1073 | 0 | pub fn expressions(&self) -> &[Expr] { |
1074 | 0 | &self.planned_expressions |
1075 | 0 | } |
1076 | | } |
1077 | | |
1078 | | impl Expr { |
1079 | | #[deprecated(since = "40.0.0", note = "use schema_name instead")] |
1080 | 0 | pub fn display_name(&self) -> Result<String> { |
1081 | 0 | Ok(self.schema_name().to_string()) |
1082 | 0 | } |
1083 | | |
1084 | | /// The name of the column (field) that this `Expr` will produce. |
1085 | | /// |
1086 | | /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow |
1087 | | /// [`Schema`] will have a field with this name. |
1088 | | /// |
1089 | | /// Note that the resulting string is subtlety different from the `Display` |
1090 | | /// representation for certain `Expr`. Some differences: |
1091 | | /// |
1092 | | /// 1. [`Expr::Alias`], which shows only the alias itself |
1093 | | /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression |
1094 | | /// |
1095 | | /// # Example |
1096 | | /// ``` |
1097 | | /// # use datafusion_expr::{col, lit}; |
1098 | | /// let expr = col("foo").eq(lit(42)); |
1099 | | /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string()); |
1100 | | /// |
1101 | | /// let expr = col("foo").alias("bar").eq(lit(11)); |
1102 | | /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string()); |
1103 | | /// ``` |
1104 | | /// |
1105 | | /// [`Schema`]: arrow::datatypes::Schema |
1106 | 0 | pub fn schema_name(&self) -> impl Display + '_ { |
1107 | 0 | SchemaDisplay(self) |
1108 | 0 | } |
1109 | | |
1110 | | /// Returns the qualifier and the schema name of this expression. |
1111 | | /// |
1112 | | /// Used when the expression forms the output field of a certain plan. |
1113 | | /// The result is the field's qualifier and field name in the plan's |
1114 | | /// output schema. We can use this qualified name to reference the field. |
1115 | 0 | pub fn qualified_name(&self) -> (Option<TableReference>, String) { |
1116 | 0 | match self { |
1117 | 0 | Expr::Column(Column { relation, name }) => (relation.clone(), name.clone()), |
1118 | 0 | Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()), |
1119 | 0 | _ => (None, self.schema_name().to_string()), |
1120 | | } |
1121 | 0 | } |
1122 | | |
1123 | | /// Returns a full and complete string representation of this expression. |
1124 | | #[deprecated(note = "use format! instead")] |
1125 | 0 | pub fn canonical_name(&self) -> String { |
1126 | 0 | format!("{self}") |
1127 | 0 | } |
1128 | | |
1129 | | /// Return String representation of the variant represented by `self` |
1130 | | /// Useful for non-rust based bindings |
1131 | 0 | pub fn variant_name(&self) -> &str { |
1132 | 0 | match self { |
1133 | 0 | Expr::AggregateFunction { .. } => "AggregateFunction", |
1134 | 0 | Expr::Alias(..) => "Alias", |
1135 | 0 | Expr::Between { .. } => "Between", |
1136 | 0 | Expr::BinaryExpr { .. } => "BinaryExpr", |
1137 | 0 | Expr::Case { .. } => "Case", |
1138 | 0 | Expr::Cast { .. } => "Cast", |
1139 | 0 | Expr::Column(..) => "Column", |
1140 | 0 | Expr::OuterReferenceColumn(_, _) => "Outer", |
1141 | 0 | Expr::Exists { .. } => "Exists", |
1142 | 0 | Expr::GroupingSet(..) => "GroupingSet", |
1143 | 0 | Expr::InList { .. } => "InList", |
1144 | 0 | Expr::InSubquery(..) => "InSubquery", |
1145 | 0 | Expr::IsNotNull(..) => "IsNotNull", |
1146 | 0 | Expr::IsNull(..) => "IsNull", |
1147 | 0 | Expr::Like { .. } => "Like", |
1148 | 0 | Expr::SimilarTo { .. } => "RLike", |
1149 | 0 | Expr::IsTrue(..) => "IsTrue", |
1150 | 0 | Expr::IsFalse(..) => "IsFalse", |
1151 | 0 | Expr::IsUnknown(..) => "IsUnknown", |
1152 | 0 | Expr::IsNotTrue(..) => "IsNotTrue", |
1153 | 0 | Expr::IsNotFalse(..) => "IsNotFalse", |
1154 | 0 | Expr::IsNotUnknown(..) => "IsNotUnknown", |
1155 | 0 | Expr::Literal(..) => "Literal", |
1156 | 0 | Expr::Negative(..) => "Negative", |
1157 | 0 | Expr::Not(..) => "Not", |
1158 | 0 | Expr::Placeholder(_) => "Placeholder", |
1159 | 0 | Expr::ScalarFunction(..) => "ScalarFunction", |
1160 | 0 | Expr::ScalarSubquery { .. } => "ScalarSubquery", |
1161 | 0 | Expr::ScalarVariable(..) => "ScalarVariable", |
1162 | 0 | Expr::TryCast { .. } => "TryCast", |
1163 | 0 | Expr::WindowFunction { .. } => "WindowFunction", |
1164 | 0 | Expr::Wildcard { .. } => "Wildcard", |
1165 | 0 | Expr::Unnest { .. } => "Unnest", |
1166 | | } |
1167 | 0 | } |
1168 | | |
1169 | | /// Return `self == other` |
1170 | 0 | pub fn eq(self, other: Expr) -> Expr { |
1171 | 0 | binary_expr(self, Operator::Eq, other) |
1172 | 0 | } |
1173 | | |
1174 | | /// Return `self != other` |
1175 | 0 | pub fn not_eq(self, other: Expr) -> Expr { |
1176 | 0 | binary_expr(self, Operator::NotEq, other) |
1177 | 0 | } |
1178 | | |
1179 | | /// Return `self > other` |
1180 | 0 | pub fn gt(self, other: Expr) -> Expr { |
1181 | 0 | binary_expr(self, Operator::Gt, other) |
1182 | 0 | } |
1183 | | |
1184 | | /// Return `self >= other` |
1185 | 0 | pub fn gt_eq(self, other: Expr) -> Expr { |
1186 | 0 | binary_expr(self, Operator::GtEq, other) |
1187 | 0 | } |
1188 | | |
1189 | | /// Return `self < other` |
1190 | 0 | pub fn lt(self, other: Expr) -> Expr { |
1191 | 0 | binary_expr(self, Operator::Lt, other) |
1192 | 0 | } |
1193 | | |
1194 | | /// Return `self <= other` |
1195 | 0 | pub fn lt_eq(self, other: Expr) -> Expr { |
1196 | 0 | binary_expr(self, Operator::LtEq, other) |
1197 | 0 | } |
1198 | | |
1199 | | /// Return `self && other` |
1200 | 0 | pub fn and(self, other: Expr) -> Expr { |
1201 | 0 | binary_expr(self, Operator::And, other) |
1202 | 0 | } |
1203 | | |
1204 | | /// Return `self || other` |
1205 | 0 | pub fn or(self, other: Expr) -> Expr { |
1206 | 0 | binary_expr(self, Operator::Or, other) |
1207 | 0 | } |
1208 | | |
1209 | | /// Return `self LIKE other` |
1210 | 0 | pub fn like(self, other: Expr) -> Expr { |
1211 | 0 | Expr::Like(Like::new( |
1212 | 0 | false, |
1213 | 0 | Box::new(self), |
1214 | 0 | Box::new(other), |
1215 | 0 | None, |
1216 | 0 | false, |
1217 | 0 | )) |
1218 | 0 | } |
1219 | | |
1220 | | /// Return `self NOT LIKE other` |
1221 | 0 | pub fn not_like(self, other: Expr) -> Expr { |
1222 | 0 | Expr::Like(Like::new( |
1223 | 0 | true, |
1224 | 0 | Box::new(self), |
1225 | 0 | Box::new(other), |
1226 | 0 | None, |
1227 | 0 | false, |
1228 | 0 | )) |
1229 | 0 | } |
1230 | | |
1231 | | /// Return `self ILIKE other` |
1232 | 0 | pub fn ilike(self, other: Expr) -> Expr { |
1233 | 0 | Expr::Like(Like::new( |
1234 | 0 | false, |
1235 | 0 | Box::new(self), |
1236 | 0 | Box::new(other), |
1237 | 0 | None, |
1238 | 0 | true, |
1239 | 0 | )) |
1240 | 0 | } |
1241 | | |
1242 | | /// Return `self NOT ILIKE other` |
1243 | 0 | pub fn not_ilike(self, other: Expr) -> Expr { |
1244 | 0 | Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true)) |
1245 | 0 | } |
1246 | | |
1247 | | /// Return the name to use for the specific Expr |
1248 | 0 | pub fn name_for_alias(&self) -> Result<String> { |
1249 | 0 | Ok(self.schema_name().to_string()) |
1250 | 0 | } |
1251 | | |
1252 | | /// Ensure `expr` has the name as `original_name` by adding an |
1253 | | /// alias if necessary. |
1254 | 0 | pub fn alias_if_changed(self, original_name: String) -> Result<Expr> { |
1255 | 0 | let new_name = self.name_for_alias()?; |
1256 | 0 | if new_name == original_name { |
1257 | 0 | return Ok(self); |
1258 | 0 | } |
1259 | 0 |
|
1260 | 0 | Ok(self.alias(original_name)) |
1261 | 0 | } |
1262 | | |
1263 | | /// Return `self AS name` alias expression |
1264 | 0 | pub fn alias(self, name: impl Into<String>) -> Expr { |
1265 | 0 | Expr::Alias(Alias::new(self, None::<&str>, name.into())) |
1266 | 0 | } |
1267 | | |
1268 | | /// Return `self AS name` alias expression with a specific qualifier |
1269 | 0 | pub fn alias_qualified( |
1270 | 0 | self, |
1271 | 0 | relation: Option<impl Into<TableReference>>, |
1272 | 0 | name: impl Into<String>, |
1273 | 0 | ) -> Expr { |
1274 | 0 | Expr::Alias(Alias::new(self, relation, name.into())) |
1275 | 0 | } |
1276 | | |
1277 | | /// Remove an alias from an expression if one exists. |
1278 | | /// |
1279 | | /// If the expression is not an alias, the expression is returned unchanged. |
1280 | | /// This method does not remove aliases from nested expressions. |
1281 | | /// |
1282 | | /// # Example |
1283 | | /// ``` |
1284 | | /// # use datafusion_expr::col; |
1285 | | /// // `foo as "bar"` is unaliased to `foo` |
1286 | | /// let expr = col("foo").alias("bar"); |
1287 | | /// assert_eq!(expr.unalias(), col("foo")); |
1288 | | /// |
1289 | | /// // `foo as "bar" + baz` is not unaliased |
1290 | | /// let expr = col("foo").alias("bar") + col("baz"); |
1291 | | /// assert_eq!(expr.clone().unalias(), expr); |
1292 | | /// |
1293 | | /// // `foo as "bar" as "baz" is unalaised to foo as "bar" |
1294 | | /// let expr = col("foo").alias("bar").alias("baz"); |
1295 | | /// assert_eq!(expr.unalias(), col("foo").alias("bar")); |
1296 | | /// ``` |
1297 | 0 | pub fn unalias(self) -> Expr { |
1298 | 0 | match self { |
1299 | 0 | Expr::Alias(alias) => *alias.expr, |
1300 | 0 | _ => self, |
1301 | | } |
1302 | 0 | } |
1303 | | |
1304 | | /// Recursively removed potentially multiple aliases from an expression. |
1305 | | /// |
1306 | | /// This method removes nested aliases and returns [`Transformed`] |
1307 | | /// to signal if the expression was changed. |
1308 | | /// |
1309 | | /// # Example |
1310 | | /// ``` |
1311 | | /// # use datafusion_expr::col; |
1312 | | /// // `foo as "bar"` is unaliased to `foo` |
1313 | | /// let expr = col("foo").alias("bar"); |
1314 | | /// assert_eq!(expr.unalias_nested().data, col("foo")); |
1315 | | /// |
1316 | | /// // `foo as "bar" + baz` is unaliased |
1317 | | /// let expr = col("foo").alias("bar") + col("baz"); |
1318 | | /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz")); |
1319 | | /// |
1320 | | /// // `foo as "bar" as "baz" is unalaised to foo |
1321 | | /// let expr = col("foo").alias("bar").alias("baz"); |
1322 | | /// assert_eq!(expr.unalias_nested().data, col("foo")); |
1323 | | /// ``` |
1324 | 0 | pub fn unalias_nested(self) -> Transformed<Expr> { |
1325 | 0 | self.transform_down_up( |
1326 | 0 | |expr| { |
1327 | | // f_down: skip subqueries. Check in f_down to avoid recursing into them |
1328 | 0 | let recursion = if matches!( |
1329 | 0 | expr, |
1330 | | Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_) |
1331 | | ) { |
1332 | | // subqueries could contain aliases so don't recurse into those |
1333 | 0 | TreeNodeRecursion::Jump |
1334 | | } else { |
1335 | 0 | TreeNodeRecursion::Continue |
1336 | | }; |
1337 | 0 | Ok(Transformed::new(expr, false, recursion)) |
1338 | 0 | }, |
1339 | 0 | |expr| { |
1340 | | // f_up: unalias on up so we can remove nested aliases like |
1341 | | // `(x as foo) as bar` |
1342 | 0 | if let Expr::Alias(Alias { expr, .. }) = expr { |
1343 | 0 | Ok(Transformed::yes(*expr)) |
1344 | | } else { |
1345 | 0 | Ok(Transformed::no(expr)) |
1346 | | } |
1347 | 0 | }, |
1348 | 0 | ) |
1349 | 0 | // unreachable code: internal closure doesn't return err |
1350 | 0 | .unwrap() |
1351 | 0 | } |
1352 | | |
1353 | | /// Return `self IN <list>` if `negated` is false, otherwise |
1354 | | /// return `self NOT IN <list>`.a |
1355 | 0 | pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr { |
1356 | 0 | Expr::InList(InList::new(Box::new(self), list, negated)) |
1357 | 0 | } |
1358 | | |
1359 | | /// Return `IsNull(Box(self)) |
1360 | 0 | pub fn is_null(self) -> Expr { |
1361 | 0 | Expr::IsNull(Box::new(self)) |
1362 | 0 | } |
1363 | | |
1364 | | /// Return `IsNotNull(Box(self)) |
1365 | 0 | pub fn is_not_null(self) -> Expr { |
1366 | 0 | Expr::IsNotNull(Box::new(self)) |
1367 | 0 | } |
1368 | | |
1369 | | /// Create a sort configuration from an existing expression. |
1370 | | /// |
1371 | | /// ``` |
1372 | | /// # use datafusion_expr::col; |
1373 | | /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST |
1374 | | /// ``` |
1375 | 0 | pub fn sort(self, asc: bool, nulls_first: bool) -> Sort { |
1376 | 0 | Sort::new(self, asc, nulls_first) |
1377 | 0 | } |
1378 | | |
1379 | | /// Return `IsTrue(Box(self))` |
1380 | 0 | pub fn is_true(self) -> Expr { |
1381 | 0 | Expr::IsTrue(Box::new(self)) |
1382 | 0 | } |
1383 | | |
1384 | | /// Return `IsNotTrue(Box(self))` |
1385 | 0 | pub fn is_not_true(self) -> Expr { |
1386 | 0 | Expr::IsNotTrue(Box::new(self)) |
1387 | 0 | } |
1388 | | |
1389 | | /// Return `IsFalse(Box(self))` |
1390 | 0 | pub fn is_false(self) -> Expr { |
1391 | 0 | Expr::IsFalse(Box::new(self)) |
1392 | 0 | } |
1393 | | |
1394 | | /// Return `IsNotFalse(Box(self))` |
1395 | 0 | pub fn is_not_false(self) -> Expr { |
1396 | 0 | Expr::IsNotFalse(Box::new(self)) |
1397 | 0 | } |
1398 | | |
1399 | | /// Return `IsUnknown(Box(self))` |
1400 | 0 | pub fn is_unknown(self) -> Expr { |
1401 | 0 | Expr::IsUnknown(Box::new(self)) |
1402 | 0 | } |
1403 | | |
1404 | | /// Return `IsNotUnknown(Box(self))` |
1405 | 0 | pub fn is_not_unknown(self) -> Expr { |
1406 | 0 | Expr::IsNotUnknown(Box::new(self)) |
1407 | 0 | } |
1408 | | |
1409 | | /// return `self BETWEEN low AND high` |
1410 | 0 | pub fn between(self, low: Expr, high: Expr) -> Expr { |
1411 | 0 | Expr::Between(Between::new( |
1412 | 0 | Box::new(self), |
1413 | 0 | false, |
1414 | 0 | Box::new(low), |
1415 | 0 | Box::new(high), |
1416 | 0 | )) |
1417 | 0 | } |
1418 | | |
1419 | | /// return `self NOT BETWEEN low AND high` |
1420 | 0 | pub fn not_between(self, low: Expr, high: Expr) -> Expr { |
1421 | 0 | Expr::Between(Between::new( |
1422 | 0 | Box::new(self), |
1423 | 0 | true, |
1424 | 0 | Box::new(low), |
1425 | 0 | Box::new(high), |
1426 | 0 | )) |
1427 | 0 | } |
1428 | | |
1429 | | #[deprecated(since = "39.0.0", note = "use try_as_col instead")] |
1430 | 0 | pub fn try_into_col(&self) -> Result<Column> { |
1431 | 0 | match self { |
1432 | 0 | Expr::Column(it) => Ok(it.clone()), |
1433 | 0 | _ => plan_err!("Could not coerce '{self}' into Column!"), |
1434 | | } |
1435 | 0 | } |
1436 | | |
1437 | | /// Return a reference to the inner `Column` if any |
1438 | | /// |
1439 | | /// returns `None` if the expression is not a `Column` |
1440 | | /// |
1441 | | /// Note: None may be returned for expressions that are not `Column` but |
1442 | | /// are convertible to `Column` such as `Cast` expressions. |
1443 | | /// |
1444 | | /// Example |
1445 | | /// ``` |
1446 | | /// # use datafusion_common::Column; |
1447 | | /// use datafusion_expr::{col, Expr}; |
1448 | | /// let expr = col("foo"); |
1449 | | /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo"))); |
1450 | | /// |
1451 | | /// let expr = col("foo").alias("bar"); |
1452 | | /// assert_eq!(expr.try_as_col(), None); |
1453 | | /// ``` |
1454 | 0 | pub fn try_as_col(&self) -> Option<&Column> { |
1455 | 0 | if let Expr::Column(it) = self { |
1456 | 0 | Some(it) |
1457 | | } else { |
1458 | 0 | None |
1459 | | } |
1460 | 0 | } |
1461 | | |
1462 | | /// Returns the inner `Column` if any. This is a specialized version of |
1463 | | /// [`Self::try_as_col`] that take Cast expressions into account when the |
1464 | | /// expression is as on condition for joins. |
1465 | | /// |
1466 | | /// Called this method when you are sure that the expression is a `Column` |
1467 | | /// or a `Cast` expression that wraps a `Column`. |
1468 | 0 | pub fn get_as_join_column(&self) -> Option<&Column> { |
1469 | 0 | match self { |
1470 | 0 | Expr::Column(c) => Some(c), |
1471 | 0 | Expr::Cast(Cast { expr, .. }) => match &**expr { |
1472 | 0 | Expr::Column(c) => Some(c), |
1473 | 0 | _ => None, |
1474 | | }, |
1475 | 0 | _ => None, |
1476 | | } |
1477 | 0 | } |
1478 | | |
1479 | | /// Return all referenced columns of this expression. |
1480 | | #[deprecated(since = "40.0.0", note = "use Expr::column_refs instead")] |
1481 | 0 | pub fn to_columns(&self) -> Result<HashSet<Column>> { |
1482 | 0 | let mut using_columns = HashSet::new(); |
1483 | 0 | expr_to_columns(self, &mut using_columns)?; |
1484 | | |
1485 | 0 | Ok(using_columns) |
1486 | 0 | } |
1487 | | |
1488 | | /// Return all references to columns in this expression. |
1489 | | /// |
1490 | | /// # Example |
1491 | | /// ``` |
1492 | | /// # use std::collections::HashSet; |
1493 | | /// # use datafusion_common::Column; |
1494 | | /// # use datafusion_expr::col; |
1495 | | /// // For an expression `a + (b * a)` |
1496 | | /// let expr = col("a") + (col("b") * col("a")); |
1497 | | /// let refs = expr.column_refs(); |
1498 | | /// // refs contains "a" and "b" |
1499 | | /// assert_eq!(refs.len(), 2); |
1500 | | /// assert!(refs.contains(&Column::new_unqualified("a"))); |
1501 | | /// assert!(refs.contains(&Column::new_unqualified("b"))); |
1502 | | /// ``` |
1503 | 0 | pub fn column_refs(&self) -> HashSet<&Column> { |
1504 | 0 | let mut using_columns = HashSet::new(); |
1505 | 0 | self.add_column_refs(&mut using_columns); |
1506 | 0 | using_columns |
1507 | 0 | } |
1508 | | |
1509 | | /// Adds references to all columns in this expression to the set |
1510 | | /// |
1511 | | /// See [`Self::column_refs`] for details |
1512 | 0 | pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) { |
1513 | 0 | self.apply(|expr| { |
1514 | 0 | if let Expr::Column(col) = expr { |
1515 | 0 | set.insert(col); |
1516 | 0 | } |
1517 | 0 | Ok(TreeNodeRecursion::Continue) |
1518 | 0 | }) |
1519 | 0 | .expect("traversal is infallible"); |
1520 | 0 | } |
1521 | | |
1522 | | /// Return all references to columns and their occurrence counts in the expression. |
1523 | | /// |
1524 | | /// # Example |
1525 | | /// ``` |
1526 | | /// # use std::collections::HashMap; |
1527 | | /// # use datafusion_common::Column; |
1528 | | /// # use datafusion_expr::col; |
1529 | | /// // For an expression `a + (b * a)` |
1530 | | /// let expr = col("a") + (col("b") * col("a")); |
1531 | | /// let mut refs = expr.column_refs_counts(); |
1532 | | /// // refs contains "a" and "b" |
1533 | | /// assert_eq!(refs.len(), 2); |
1534 | | /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2); |
1535 | | /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1); |
1536 | | /// ``` |
1537 | 0 | pub fn column_refs_counts(&self) -> HashMap<&Column, usize> { |
1538 | 0 | let mut map = HashMap::new(); |
1539 | 0 | self.add_column_ref_counts(&mut map); |
1540 | 0 | map |
1541 | 0 | } |
1542 | | |
1543 | | /// Adds references to all columns and their occurrence counts in the expression to |
1544 | | /// the map. |
1545 | | /// |
1546 | | /// See [`Self::column_refs_counts`] for details |
1547 | 0 | pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) { |
1548 | 0 | self.apply(|expr| { |
1549 | 0 | if let Expr::Column(col) = expr { |
1550 | 0 | *map.entry(col).or_default() += 1; |
1551 | 0 | } |
1552 | 0 | Ok(TreeNodeRecursion::Continue) |
1553 | 0 | }) |
1554 | 0 | .expect("traversal is infallible"); |
1555 | 0 | } |
1556 | | |
1557 | | /// Returns true if there are any column references in this Expr |
1558 | 0 | pub fn any_column_refs(&self) -> bool { |
1559 | 0 | self.exists(|expr| Ok(matches!(expr, Expr::Column(_)))) |
1560 | 0 | .unwrap() |
1561 | 0 | } |
1562 | | |
1563 | | /// Return true when the expression contains out reference(correlated) expressions. |
1564 | 0 | pub fn contains_outer(&self) -> bool { |
1565 | 0 | self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. }))) |
1566 | 0 | .unwrap() |
1567 | 0 | } |
1568 | | |
1569 | | /// Returns true if the expression node is volatile, i.e. whether it can return |
1570 | | /// different results when evaluated multiple times with the same input. |
1571 | | /// Note: unlike [`Self::is_volatile`], this function does not consider inputs: |
1572 | | /// - `rand()` returns `true`, |
1573 | | /// - `a + rand()` returns `false` |
1574 | 0 | pub fn is_volatile_node(&self) -> bool { |
1575 | 0 | matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile) |
1576 | 0 | } |
1577 | | |
1578 | | /// Returns true if the expression is volatile, i.e. whether it can return different |
1579 | | /// results when evaluated multiple times with the same input. |
1580 | 0 | pub fn is_volatile(&self) -> Result<bool> { |
1581 | 0 | self.exists(|expr| Ok(expr.is_volatile_node())) |
1582 | 0 | } |
1583 | | |
1584 | | /// Recursively find all [`Expr::Placeholder`] expressions, and |
1585 | | /// to infer their [`DataType`] from the context of their use. |
1586 | | /// |
1587 | | /// For example, gicen an expression like `<int32> = $0` will infer `$0` to |
1588 | | /// have type `int32`. |
1589 | 0 | pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<Expr> { |
1590 | 0 | self.transform(|mut expr| { |
1591 | | // Default to assuming the arguments are the same type |
1592 | 0 | if let Expr::BinaryExpr(BinaryExpr { left, op: _, right }) = &mut expr { |
1593 | 0 | rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?; |
1594 | 0 | rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?; |
1595 | 0 | }; |
1596 | | if let Expr::Between(Between { |
1597 | 0 | expr, |
1598 | 0 | negated: _, |
1599 | 0 | low, |
1600 | 0 | high, |
1601 | 0 | }) = &mut expr |
1602 | | { |
1603 | 0 | rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?; |
1604 | 0 | rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?; |
1605 | 0 | } |
1606 | 0 | Ok(Transformed::yes(expr)) |
1607 | 0 | }) |
1608 | 0 | .data() |
1609 | 0 | } |
1610 | | |
1611 | | /// Returns true if some of this `exprs` subexpressions may not be evaluated |
1612 | | /// and thus any side effects (like divide by zero) may not be encountered |
1613 | 0 | pub fn short_circuits(&self) -> bool { |
1614 | 0 | match self { |
1615 | 0 | Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(), |
1616 | 0 | Expr::BinaryExpr(BinaryExpr { op, .. }) => { |
1617 | 0 | matches!(op, Operator::And | Operator::Or) |
1618 | | } |
1619 | 0 | Expr::Case { .. } => true, |
1620 | | // Use explicit pattern match instead of a default |
1621 | | // implementation, so that in the future if someone adds |
1622 | | // new Expr types, they will check here as well |
1623 | | Expr::AggregateFunction(..) |
1624 | | | Expr::Alias(..) |
1625 | | | Expr::Between(..) |
1626 | | | Expr::Cast(..) |
1627 | | | Expr::Column(..) |
1628 | | | Expr::Exists(..) |
1629 | | | Expr::GroupingSet(..) |
1630 | | | Expr::InList(..) |
1631 | | | Expr::InSubquery(..) |
1632 | | | Expr::IsFalse(..) |
1633 | | | Expr::IsNotFalse(..) |
1634 | | | Expr::IsNotNull(..) |
1635 | | | Expr::IsNotTrue(..) |
1636 | | | Expr::IsNotUnknown(..) |
1637 | | | Expr::IsNull(..) |
1638 | | | Expr::IsTrue(..) |
1639 | | | Expr::IsUnknown(..) |
1640 | | | Expr::Like(..) |
1641 | | | Expr::ScalarSubquery(..) |
1642 | | | Expr::ScalarVariable(_, _) |
1643 | | | Expr::SimilarTo(..) |
1644 | | | Expr::Not(..) |
1645 | | | Expr::Negative(..) |
1646 | | | Expr::OuterReferenceColumn(_, _) |
1647 | | | Expr::TryCast(..) |
1648 | | | Expr::Unnest(..) |
1649 | | | Expr::Wildcard { .. } |
1650 | | | Expr::WindowFunction(..) |
1651 | | | Expr::Literal(..) |
1652 | 0 | | Expr::Placeholder(..) => false, |
1653 | | } |
1654 | 0 | } |
1655 | | |
1656 | | /// Hashes the direct content of an `Expr` without recursing into its children. |
1657 | | /// |
1658 | | /// This method is useful to incrementally compute hashes, such as in |
1659 | | /// `CommonSubexprEliminate` which builds a deep hash of a node and its descendants |
1660 | | /// during the bottom-up phase of the first traversal and so avoid computing the hash |
1661 | | /// of the node and then the hash of its descendants separately. |
1662 | | /// |
1663 | | /// If a node doesn't have any children then this method is similar to `.hash()`, but |
1664 | | /// not necessarily returns the same value. |
1665 | | /// |
1666 | | /// As it is pretty easy to forget changing this method when `Expr` changes the |
1667 | | /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes |
1668 | | /// compile time. |
1669 | 0 | pub fn hash_node<H: Hasher>(&self, hasher: &mut H) { |
1670 | 0 | mem::discriminant(self).hash(hasher); |
1671 | 0 | match self { |
1672 | | Expr::Alias(Alias { |
1673 | 0 | expr: _expr, |
1674 | 0 | relation, |
1675 | 0 | name, |
1676 | 0 | }) => { |
1677 | 0 | relation.hash(hasher); |
1678 | 0 | name.hash(hasher); |
1679 | 0 | } |
1680 | 0 | Expr::Column(column) => { |
1681 | 0 | column.hash(hasher); |
1682 | 0 | } |
1683 | 0 | Expr::ScalarVariable(data_type, name) => { |
1684 | 0 | data_type.hash(hasher); |
1685 | 0 | name.hash(hasher); |
1686 | 0 | } |
1687 | 0 | Expr::Literal(scalar_value) => { |
1688 | 0 | scalar_value.hash(hasher); |
1689 | 0 | } |
1690 | | Expr::BinaryExpr(BinaryExpr { |
1691 | 0 | left: _left, |
1692 | 0 | op, |
1693 | 0 | right: _right, |
1694 | 0 | }) => { |
1695 | 0 | op.hash(hasher); |
1696 | 0 | } |
1697 | | Expr::Like(Like { |
1698 | 0 | negated, |
1699 | 0 | expr: _expr, |
1700 | 0 | pattern: _pattern, |
1701 | 0 | escape_char, |
1702 | 0 | case_insensitive, |
1703 | | }) |
1704 | | | Expr::SimilarTo(Like { |
1705 | 0 | negated, |
1706 | 0 | expr: _expr, |
1707 | 0 | pattern: _pattern, |
1708 | 0 | escape_char, |
1709 | 0 | case_insensitive, |
1710 | 0 | }) => { |
1711 | 0 | negated.hash(hasher); |
1712 | 0 | escape_char.hash(hasher); |
1713 | 0 | case_insensitive.hash(hasher); |
1714 | 0 | } |
1715 | 0 | Expr::Not(_expr) |
1716 | 0 | | Expr::IsNotNull(_expr) |
1717 | 0 | | Expr::IsNull(_expr) |
1718 | 0 | | Expr::IsTrue(_expr) |
1719 | 0 | | Expr::IsFalse(_expr) |
1720 | 0 | | Expr::IsUnknown(_expr) |
1721 | 0 | | Expr::IsNotTrue(_expr) |
1722 | 0 | | Expr::IsNotFalse(_expr) |
1723 | 0 | | Expr::IsNotUnknown(_expr) |
1724 | 0 | | Expr::Negative(_expr) => {} |
1725 | | Expr::Between(Between { |
1726 | 0 | expr: _expr, |
1727 | 0 | negated, |
1728 | 0 | low: _low, |
1729 | 0 | high: _high, |
1730 | 0 | }) => { |
1731 | 0 | negated.hash(hasher); |
1732 | 0 | } |
1733 | | Expr::Case(Case { |
1734 | 0 | expr: _expr, |
1735 | 0 | when_then_expr: _when_then_expr, |
1736 | 0 | else_expr: _else_expr, |
1737 | 0 | }) => {} |
1738 | | Expr::Cast(Cast { |
1739 | 0 | expr: _expr, |
1740 | 0 | data_type, |
1741 | | }) |
1742 | | | Expr::TryCast(TryCast { |
1743 | 0 | expr: _expr, |
1744 | 0 | data_type, |
1745 | 0 | }) => { |
1746 | 0 | data_type.hash(hasher); |
1747 | 0 | } |
1748 | 0 | Expr::ScalarFunction(ScalarFunction { func, args: _args }) => { |
1749 | 0 | func.hash(hasher); |
1750 | 0 | } |
1751 | | Expr::AggregateFunction(AggregateFunction { |
1752 | 0 | func, |
1753 | 0 | args: _args, |
1754 | 0 | distinct, |
1755 | 0 | filter: _filter, |
1756 | 0 | order_by: _order_by, |
1757 | 0 | null_treatment, |
1758 | 0 | }) => { |
1759 | 0 | func.hash(hasher); |
1760 | 0 | distinct.hash(hasher); |
1761 | 0 | null_treatment.hash(hasher); |
1762 | 0 | } |
1763 | | Expr::WindowFunction(WindowFunction { |
1764 | 0 | fun, |
1765 | 0 | args: _args, |
1766 | 0 | partition_by: _partition_by, |
1767 | 0 | order_by: _order_by, |
1768 | 0 | window_frame, |
1769 | 0 | null_treatment, |
1770 | 0 | }) => { |
1771 | 0 | fun.hash(hasher); |
1772 | 0 | window_frame.hash(hasher); |
1773 | 0 | null_treatment.hash(hasher); |
1774 | 0 | } |
1775 | | Expr::InList(InList { |
1776 | 0 | expr: _expr, |
1777 | 0 | list: _list, |
1778 | 0 | negated, |
1779 | 0 | }) => { |
1780 | 0 | negated.hash(hasher); |
1781 | 0 | } |
1782 | 0 | Expr::Exists(Exists { subquery, negated }) => { |
1783 | 0 | subquery.hash(hasher); |
1784 | 0 | negated.hash(hasher); |
1785 | 0 | } |
1786 | | Expr::InSubquery(InSubquery { |
1787 | 0 | expr: _expr, |
1788 | 0 | subquery, |
1789 | 0 | negated, |
1790 | 0 | }) => { |
1791 | 0 | subquery.hash(hasher); |
1792 | 0 | negated.hash(hasher); |
1793 | 0 | } |
1794 | 0 | Expr::ScalarSubquery(subquery) => { |
1795 | 0 | subquery.hash(hasher); |
1796 | 0 | } |
1797 | 0 | Expr::Wildcard { qualifier, options } => { |
1798 | 0 | qualifier.hash(hasher); |
1799 | 0 | options.hash(hasher); |
1800 | 0 | } |
1801 | 0 | Expr::GroupingSet(grouping_set) => { |
1802 | 0 | mem::discriminant(grouping_set).hash(hasher); |
1803 | 0 | match grouping_set { |
1804 | 0 | GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {} |
1805 | 0 | GroupingSet::GroupingSets(_exprs) => {} |
1806 | | } |
1807 | | } |
1808 | 0 | Expr::Placeholder(place_holder) => { |
1809 | 0 | place_holder.hash(hasher); |
1810 | 0 | } |
1811 | 0 | Expr::OuterReferenceColumn(data_type, column) => { |
1812 | 0 | data_type.hash(hasher); |
1813 | 0 | column.hash(hasher); |
1814 | 0 | } |
1815 | 0 | Expr::Unnest(Unnest { expr: _expr }) => {} |
1816 | | }; |
1817 | 0 | } |
1818 | | } |
1819 | | |
1820 | | // modifies expr if it is a placeholder with datatype of right |
1821 | 0 | fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> { |
1822 | 0 | if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr { |
1823 | 0 | if data_type.is_none() { |
1824 | 0 | let other_dt = other.get_type(schema); |
1825 | 0 | match other_dt { |
1826 | 0 | Err(e) => { |
1827 | 0 | Err(e.context(format!( |
1828 | 0 | "Can not find type of {other} needed to infer type of {expr}" |
1829 | 0 | )))?; |
1830 | | } |
1831 | 0 | Ok(dt) => { |
1832 | 0 | *data_type = Some(dt); |
1833 | 0 | } |
1834 | | } |
1835 | 0 | }; |
1836 | 0 | } |
1837 | 0 | Ok(()) |
1838 | 0 | } |
1839 | | |
1840 | | #[macro_export] |
1841 | | macro_rules! expr_vec_fmt { |
1842 | | ( $ARRAY:expr ) => {{ |
1843 | | $ARRAY |
1844 | | .iter() |
1845 | 0 | .map(|e| format!("{e}")) |
1846 | | .collect::<Vec<String>>() |
1847 | | .join(", ") |
1848 | | }}; |
1849 | | } |
1850 | | |
1851 | | struct SchemaDisplay<'a>(&'a Expr); |
1852 | | impl<'a> Display for SchemaDisplay<'a> { |
1853 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
1854 | 0 | match self.0 { |
1855 | | // The same as Display |
1856 | | Expr::Column(_) |
1857 | | | Expr::Literal(_) |
1858 | | | Expr::ScalarVariable(..) |
1859 | | | Expr::OuterReferenceColumn(..) |
1860 | | | Expr::Placeholder(_) |
1861 | 0 | | Expr::Wildcard { .. } => write!(f, "{}", self.0), |
1862 | | |
1863 | | Expr::AggregateFunction(AggregateFunction { |
1864 | 0 | func, |
1865 | 0 | args, |
1866 | 0 | distinct, |
1867 | 0 | filter, |
1868 | 0 | order_by, |
1869 | 0 | null_treatment, |
1870 | 0 | }) => { |
1871 | 0 | write!( |
1872 | 0 | f, |
1873 | 0 | "{}({}{})", |
1874 | 0 | func.name(), |
1875 | 0 | if *distinct { "DISTINCT " } else { "" }, |
1876 | 0 | schema_name_from_exprs_comma_seperated_without_space(args)? |
1877 | 0 | )?; |
1878 | | |
1879 | 0 | if let Some(null_treatment) = null_treatment { |
1880 | 0 | write!(f, " {}", null_treatment)?; |
1881 | 0 | } |
1882 | | |
1883 | 0 | if let Some(filter) = filter { |
1884 | 0 | write!(f, " FILTER (WHERE {filter})")?; |
1885 | 0 | }; |
1886 | | |
1887 | 0 | if let Some(order_by) = order_by { |
1888 | 0 | write!(f, " ORDER BY [{}]", schema_name_from_sorts(order_by)?)?; |
1889 | 0 | }; |
1890 | | |
1891 | 0 | Ok(()) |
1892 | | } |
1893 | | // expr is not shown since it is aliased |
1894 | 0 | Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), |
1895 | | Expr::Between(Between { |
1896 | 0 | expr, |
1897 | 0 | negated, |
1898 | 0 | low, |
1899 | 0 | high, |
1900 | 0 | }) => { |
1901 | 0 | if *negated { |
1902 | 0 | write!( |
1903 | 0 | f, |
1904 | 0 | "{} NOT BETWEEN {} AND {}", |
1905 | 0 | SchemaDisplay(expr), |
1906 | 0 | SchemaDisplay(low), |
1907 | 0 | SchemaDisplay(high), |
1908 | 0 | ) |
1909 | | } else { |
1910 | 0 | write!( |
1911 | 0 | f, |
1912 | 0 | "{} BETWEEN {} AND {}", |
1913 | 0 | SchemaDisplay(expr), |
1914 | 0 | SchemaDisplay(low), |
1915 | 0 | SchemaDisplay(high), |
1916 | 0 | ) |
1917 | | } |
1918 | | } |
1919 | 0 | Expr::BinaryExpr(BinaryExpr { left, op, right }) => { |
1920 | 0 | write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),) |
1921 | | } |
1922 | | Expr::Case(Case { |
1923 | 0 | expr, |
1924 | 0 | when_then_expr, |
1925 | 0 | else_expr, |
1926 | 0 | }) => { |
1927 | 0 | write!(f, "CASE ")?; |
1928 | | |
1929 | 0 | if let Some(e) = expr { |
1930 | 0 | write!(f, "{} ", SchemaDisplay(e))?; |
1931 | 0 | } |
1932 | | |
1933 | 0 | for (when, then) in when_then_expr { |
1934 | 0 | write!( |
1935 | 0 | f, |
1936 | 0 | "WHEN {} THEN {} ", |
1937 | 0 | SchemaDisplay(when), |
1938 | 0 | SchemaDisplay(then), |
1939 | 0 | )?; |
1940 | | } |
1941 | | |
1942 | 0 | if let Some(e) = else_expr { |
1943 | 0 | write!(f, "ELSE {} ", SchemaDisplay(e))?; |
1944 | 0 | } |
1945 | | |
1946 | 0 | write!(f, "END") |
1947 | | } |
1948 | | // cast expr is not shown to be consistant with Postgres and Spark <https://github.com/apache/datafusion/pull/3222> |
1949 | 0 | Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => { |
1950 | 0 | write!(f, "{}", SchemaDisplay(expr)) |
1951 | | } |
1952 | | Expr::InList(InList { |
1953 | 0 | expr, |
1954 | 0 | list, |
1955 | 0 | negated, |
1956 | | }) => { |
1957 | 0 | let inlist_name = schema_name_from_exprs(list)?; |
1958 | | |
1959 | 0 | if *negated { |
1960 | 0 | write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name) |
1961 | | } else { |
1962 | 0 | write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name) |
1963 | | } |
1964 | | } |
1965 | 0 | Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"), |
1966 | 0 | Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"), |
1967 | 0 | Expr::GroupingSet(GroupingSet::Cube(exprs)) => { |
1968 | 0 | write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?) |
1969 | | } |
1970 | 0 | Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => { |
1971 | 0 | write!(f, "GROUPING SETS (")?; |
1972 | 0 | for exprs in lists_of_exprs.iter() { |
1973 | 0 | write!(f, "({})", schema_name_from_exprs(exprs)?)?; |
1974 | | } |
1975 | 0 | write!(f, ")") |
1976 | | } |
1977 | 0 | Expr::GroupingSet(GroupingSet::Rollup(exprs)) => { |
1978 | 0 | write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?) |
1979 | | } |
1980 | 0 | Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)), |
1981 | 0 | Expr::IsNotNull(expr) => { |
1982 | 0 | write!(f, "{} IS NOT NULL", SchemaDisplay(expr)) |
1983 | | } |
1984 | 0 | Expr::IsUnknown(expr) => { |
1985 | 0 | write!(f, "{} IS UNKNOWN", SchemaDisplay(expr)) |
1986 | | } |
1987 | 0 | Expr::IsNotUnknown(expr) => { |
1988 | 0 | write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr)) |
1989 | | } |
1990 | | Expr::InSubquery(InSubquery { negated: true, .. }) => { |
1991 | 0 | write!(f, "NOT IN") |
1992 | | } |
1993 | 0 | Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"), |
1994 | 0 | Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)), |
1995 | 0 | Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)), |
1996 | 0 | Expr::IsNotTrue(expr) => { |
1997 | 0 | write!(f, "{} IS NOT TRUE", SchemaDisplay(expr)) |
1998 | | } |
1999 | 0 | Expr::IsNotFalse(expr) => { |
2000 | 0 | write!(f, "{} IS NOT FALSE", SchemaDisplay(expr)) |
2001 | | } |
2002 | | Expr::Like(Like { |
2003 | 0 | negated, |
2004 | 0 | expr, |
2005 | 0 | pattern, |
2006 | 0 | escape_char, |
2007 | 0 | case_insensitive, |
2008 | 0 | }) => { |
2009 | 0 | write!( |
2010 | 0 | f, |
2011 | 0 | "{} {}{} {}", |
2012 | 0 | SchemaDisplay(expr), |
2013 | 0 | if *negated { "NOT " } else { "" }, |
2014 | 0 | if *case_insensitive { "ILIKE" } else { "LIKE" }, |
2015 | 0 | SchemaDisplay(pattern), |
2016 | 0 | )?; |
2017 | | |
2018 | 0 | if let Some(char) = escape_char { |
2019 | 0 | write!(f, " CHAR '{char}'")?; |
2020 | 0 | } |
2021 | | |
2022 | 0 | Ok(()) |
2023 | | } |
2024 | 0 | Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)), |
2025 | 0 | Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)), |
2026 | 0 | Expr::Unnest(Unnest { expr }) => { |
2027 | 0 | write!(f, "UNNEST({})", SchemaDisplay(expr)) |
2028 | | } |
2029 | 0 | Expr::ScalarFunction(ScalarFunction { func, args }) => { |
2030 | 0 | match func.schema_name(args) { |
2031 | 0 | Ok(name) => { |
2032 | 0 | write!(f, "{name}") |
2033 | | } |
2034 | 0 | Err(e) => { |
2035 | 0 | write!(f, "got error from schema_name {}", e) |
2036 | | } |
2037 | | } |
2038 | | } |
2039 | 0 | Expr::ScalarSubquery(Subquery { subquery, .. }) => { |
2040 | 0 | write!(f, "{}", subquery.schema().field(0).name()) |
2041 | | } |
2042 | | Expr::SimilarTo(Like { |
2043 | 0 | negated, |
2044 | 0 | expr, |
2045 | 0 | pattern, |
2046 | 0 | escape_char, |
2047 | 0 | .. |
2048 | 0 | }) => { |
2049 | 0 | write!( |
2050 | 0 | f, |
2051 | 0 | "{} {} {}", |
2052 | 0 | SchemaDisplay(expr), |
2053 | 0 | if *negated { |
2054 | 0 | "NOT SIMILAR TO" |
2055 | | } else { |
2056 | 0 | "SIMILAR TO" |
2057 | | }, |
2058 | 0 | SchemaDisplay(pattern), |
2059 | 0 | )?; |
2060 | 0 | if let Some(char) = escape_char { |
2061 | 0 | write!(f, " CHAR '{char}'")?; |
2062 | 0 | } |
2063 | | |
2064 | 0 | Ok(()) |
2065 | | } |
2066 | | Expr::WindowFunction(WindowFunction { |
2067 | 0 | fun, |
2068 | 0 | args, |
2069 | 0 | partition_by, |
2070 | 0 | order_by, |
2071 | 0 | window_frame, |
2072 | 0 | null_treatment, |
2073 | 0 | }) => { |
2074 | 0 | write!( |
2075 | 0 | f, |
2076 | 0 | "{}({})", |
2077 | 0 | fun, |
2078 | 0 | schema_name_from_exprs_comma_seperated_without_space(args)? |
2079 | 0 | )?; |
2080 | | |
2081 | 0 | if let Some(null_treatment) = null_treatment { |
2082 | 0 | write!(f, " {}", null_treatment)?; |
2083 | 0 | } |
2084 | | |
2085 | 0 | if !partition_by.is_empty() { |
2086 | 0 | write!( |
2087 | 0 | f, |
2088 | 0 | " PARTITION BY [{}]", |
2089 | 0 | schema_name_from_exprs(partition_by)? |
2090 | 0 | )?; |
2091 | 0 | } |
2092 | | |
2093 | 0 | if !order_by.is_empty() { |
2094 | 0 | write!(f, " ORDER BY [{}]", schema_name_from_sorts(order_by)?)?; |
2095 | 0 | }; |
2096 | | |
2097 | 0 | write!(f, " {window_frame}") |
2098 | | } |
2099 | | } |
2100 | 0 | } |
2101 | | } |
2102 | | |
2103 | | /// Get schema_name for Vector of expressions |
2104 | | /// |
2105 | | /// Internal usage. Please call `schema_name_from_exprs` instead |
2106 | | // TODO: Use ", " to standardize the formatting of Vec<Expr>, |
2107 | | // <https://github.com/apache/datafusion/issues/10364> |
2108 | 0 | pub(crate) fn schema_name_from_exprs_comma_seperated_without_space( |
2109 | 0 | exprs: &[Expr], |
2110 | 0 | ) -> Result<String, fmt::Error> { |
2111 | 0 | schema_name_from_exprs_inner(exprs, ",") |
2112 | 0 | } |
2113 | | |
2114 | | /// Get schema_name for Vector of expressions |
2115 | 0 | pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> { |
2116 | 0 | schema_name_from_exprs_inner(exprs, ", ") |
2117 | 0 | } |
2118 | | |
2119 | 0 | fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> { |
2120 | 0 | let mut s = String::new(); |
2121 | 0 | for (i, e) in exprs.iter().enumerate() { |
2122 | 0 | if i > 0 { |
2123 | 0 | write!(&mut s, "{sep}")?; |
2124 | 0 | } |
2125 | 0 | write!(&mut s, "{}", SchemaDisplay(e))?; |
2126 | | } |
2127 | | |
2128 | 0 | Ok(s) |
2129 | 0 | } |
2130 | | |
2131 | 0 | pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> { |
2132 | 0 | let mut s = String::new(); |
2133 | 0 | for (i, e) in sorts.iter().enumerate() { |
2134 | 0 | if i > 0 { |
2135 | 0 | write!(&mut s, ", ")?; |
2136 | 0 | } |
2137 | 0 | let ordering = if e.asc { "ASC" } else { "DESC" }; |
2138 | 0 | let nulls_ordering = if e.nulls_first { |
2139 | 0 | "NULLS FIRST" |
2140 | | } else { |
2141 | 0 | "NULLS LAST" |
2142 | | }; |
2143 | 0 | write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?; |
2144 | | } |
2145 | | |
2146 | 0 | Ok(s) |
2147 | 0 | } |
2148 | | |
2149 | | /// Format expressions for display as part of a logical plan. In many cases, this will produce |
2150 | | /// similar output to `Expr.name()` except that column names will be prefixed with '#'. |
2151 | | impl fmt::Display for Expr { |
2152 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
2153 | 0 | match self { |
2154 | 0 | Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"), |
2155 | 0 | Expr::Column(c) => write!(f, "{c}"), |
2156 | 0 | Expr::OuterReferenceColumn(_, c) => write!(f, "outer_ref({c})"), |
2157 | 0 | Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")), |
2158 | 0 | Expr::Literal(v) => write!(f, "{v:?}"), |
2159 | 0 | Expr::Case(case) => { |
2160 | 0 | write!(f, "CASE ")?; |
2161 | 0 | if let Some(e) = &case.expr { |
2162 | 0 | write!(f, "{e} ")?; |
2163 | 0 | } |
2164 | 0 | for (w, t) in &case.when_then_expr { |
2165 | 0 | write!(f, "WHEN {w} THEN {t} ")?; |
2166 | | } |
2167 | 0 | if let Some(e) = &case.else_expr { |
2168 | 0 | write!(f, "ELSE {e} ")?; |
2169 | 0 | } |
2170 | 0 | write!(f, "END") |
2171 | | } |
2172 | 0 | Expr::Cast(Cast { expr, data_type }) => { |
2173 | 0 | write!(f, "CAST({expr} AS {data_type:?})") |
2174 | | } |
2175 | 0 | Expr::TryCast(TryCast { expr, data_type }) => { |
2176 | 0 | write!(f, "TRY_CAST({expr} AS {data_type:?})") |
2177 | | } |
2178 | 0 | Expr::Not(expr) => write!(f, "NOT {expr}"), |
2179 | 0 | Expr::Negative(expr) => write!(f, "(- {expr})"), |
2180 | 0 | Expr::IsNull(expr) => write!(f, "{expr} IS NULL"), |
2181 | 0 | Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"), |
2182 | 0 | Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"), |
2183 | 0 | Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"), |
2184 | 0 | Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"), |
2185 | 0 | Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"), |
2186 | 0 | Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"), |
2187 | 0 | Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"), |
2188 | | Expr::Exists(Exists { |
2189 | 0 | subquery, |
2190 | 0 | negated: true, |
2191 | 0 | }) => write!(f, "NOT EXISTS ({subquery:?})"), |
2192 | | Expr::Exists(Exists { |
2193 | 0 | subquery, |
2194 | 0 | negated: false, |
2195 | 0 | }) => write!(f, "EXISTS ({subquery:?})"), |
2196 | | Expr::InSubquery(InSubquery { |
2197 | 0 | expr, |
2198 | 0 | subquery, |
2199 | 0 | negated: true, |
2200 | 0 | }) => write!(f, "{expr} NOT IN ({subquery:?})"), |
2201 | | Expr::InSubquery(InSubquery { |
2202 | 0 | expr, |
2203 | 0 | subquery, |
2204 | 0 | negated: false, |
2205 | 0 | }) => write!(f, "{expr} IN ({subquery:?})"), |
2206 | 0 | Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"), |
2207 | 0 | Expr::BinaryExpr(expr) => write!(f, "{expr}"), |
2208 | 0 | Expr::ScalarFunction(fun) => { |
2209 | 0 | fmt_function(f, fun.name(), false, &fun.args, true) |
2210 | | } |
2211 | | // TODO: use udf's display_name, need to fix the seperator issue, <https://github.com/apache/datafusion/issues/10364> |
2212 | | // Expr::ScalarFunction(ScalarFunction { func, args }) => { |
2213 | | // write!(f, "{}", func.display_name(args).unwrap()) |
2214 | | // } |
2215 | | Expr::WindowFunction(WindowFunction { |
2216 | 0 | fun, |
2217 | 0 | args, |
2218 | 0 | partition_by, |
2219 | 0 | order_by, |
2220 | 0 | window_frame, |
2221 | 0 | null_treatment, |
2222 | 0 | }) => { |
2223 | 0 | fmt_function(f, &fun.to_string(), false, args, true)?; |
2224 | | |
2225 | 0 | if let Some(nt) = null_treatment { |
2226 | 0 | write!(f, "{}", nt)?; |
2227 | 0 | } |
2228 | | |
2229 | 0 | if !partition_by.is_empty() { |
2230 | 0 | write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?; |
2231 | 0 | } |
2232 | 0 | if !order_by.is_empty() { |
2233 | 0 | write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?; |
2234 | 0 | } |
2235 | 0 | write!( |
2236 | 0 | f, |
2237 | 0 | " {} BETWEEN {} AND {}", |
2238 | 0 | window_frame.units, window_frame.start_bound, window_frame.end_bound |
2239 | 0 | )?; |
2240 | 0 | Ok(()) |
2241 | | } |
2242 | | Expr::AggregateFunction(AggregateFunction { |
2243 | 0 | func, |
2244 | 0 | distinct, |
2245 | 0 | ref args, |
2246 | 0 | filter, |
2247 | 0 | order_by, |
2248 | 0 | null_treatment, |
2249 | 0 | .. |
2250 | 0 | }) => { |
2251 | 0 | fmt_function(f, func.name(), *distinct, args, true)?; |
2252 | 0 | if let Some(nt) = null_treatment { |
2253 | 0 | write!(f, " {}", nt)?; |
2254 | 0 | } |
2255 | 0 | if let Some(fe) = filter { |
2256 | 0 | write!(f, " FILTER (WHERE {fe})")?; |
2257 | 0 | } |
2258 | 0 | if let Some(ob) = order_by { |
2259 | 0 | write!(f, " ORDER BY [{}]", expr_vec_fmt!(ob))?; |
2260 | 0 | } |
2261 | 0 | Ok(()) |
2262 | | } |
2263 | | Expr::Between(Between { |
2264 | 0 | expr, |
2265 | 0 | negated, |
2266 | 0 | low, |
2267 | 0 | high, |
2268 | 0 | }) => { |
2269 | 0 | if *negated { |
2270 | 0 | write!(f, "{expr} NOT BETWEEN {low} AND {high}") |
2271 | | } else { |
2272 | 0 | write!(f, "{expr} BETWEEN {low} AND {high}") |
2273 | | } |
2274 | | } |
2275 | | Expr::Like(Like { |
2276 | 0 | negated, |
2277 | 0 | expr, |
2278 | 0 | pattern, |
2279 | 0 | escape_char, |
2280 | 0 | case_insensitive, |
2281 | 0 | }) => { |
2282 | 0 | write!(f, "{expr}")?; |
2283 | 0 | let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" }; |
2284 | 0 | if *negated { |
2285 | 0 | write!(f, " NOT")?; |
2286 | 0 | } |
2287 | 0 | if let Some(char) = escape_char { |
2288 | 0 | write!(f, " {op_name} {pattern} ESCAPE '{char}'") |
2289 | | } else { |
2290 | 0 | write!(f, " {op_name} {pattern}") |
2291 | | } |
2292 | | } |
2293 | | Expr::SimilarTo(Like { |
2294 | 0 | negated, |
2295 | 0 | expr, |
2296 | 0 | pattern, |
2297 | 0 | escape_char, |
2298 | 0 | case_insensitive: _, |
2299 | 0 | }) => { |
2300 | 0 | write!(f, "{expr}")?; |
2301 | 0 | if *negated { |
2302 | 0 | write!(f, " NOT")?; |
2303 | 0 | } |
2304 | 0 | if let Some(char) = escape_char { |
2305 | 0 | write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'") |
2306 | | } else { |
2307 | 0 | write!(f, " SIMILAR TO {pattern}") |
2308 | | } |
2309 | | } |
2310 | | Expr::InList(InList { |
2311 | 0 | expr, |
2312 | 0 | list, |
2313 | 0 | negated, |
2314 | 0 | }) => { |
2315 | 0 | if *negated { |
2316 | 0 | write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list)) |
2317 | | } else { |
2318 | 0 | write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list)) |
2319 | | } |
2320 | | } |
2321 | 0 | Expr::Wildcard { qualifier, options } => match qualifier { |
2322 | 0 | Some(qualifier) => write!(f, "{qualifier}.*{options}"), |
2323 | 0 | None => write!(f, "*{options}"), |
2324 | | }, |
2325 | 0 | Expr::GroupingSet(grouping_sets) => match grouping_sets { |
2326 | 0 | GroupingSet::Rollup(exprs) => { |
2327 | 0 | // ROLLUP (c0, c1, c2) |
2328 | 0 | write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs)) |
2329 | | } |
2330 | 0 | GroupingSet::Cube(exprs) => { |
2331 | 0 | // CUBE (c0, c1, c2) |
2332 | 0 | write!(f, "CUBE ({})", expr_vec_fmt!(exprs)) |
2333 | | } |
2334 | 0 | GroupingSet::GroupingSets(lists_of_exprs) => { |
2335 | 0 | // GROUPING SETS ((c0), (c1, c2), (c3, c4)) |
2336 | 0 | write!( |
2337 | 0 | f, |
2338 | 0 | "GROUPING SETS ({})", |
2339 | 0 | lists_of_exprs |
2340 | 0 | .iter() |
2341 | 0 | .map(|exprs| format!("({})", expr_vec_fmt!(exprs))) |
2342 | 0 | .collect::<Vec<String>>() |
2343 | 0 | .join(", ") |
2344 | 0 | ) |
2345 | | } |
2346 | | }, |
2347 | 0 | Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"), |
2348 | 0 | Expr::Unnest(Unnest { expr }) => { |
2349 | 0 | write!(f, "UNNEST({expr})") |
2350 | | } |
2351 | | } |
2352 | 0 | } |
2353 | | } |
2354 | | |
2355 | 0 | fn fmt_function( |
2356 | 0 | f: &mut fmt::Formatter, |
2357 | 0 | fun: &str, |
2358 | 0 | distinct: bool, |
2359 | 0 | args: &[Expr], |
2360 | 0 | display: bool, |
2361 | 0 | ) -> fmt::Result { |
2362 | 0 | let args: Vec<String> = match display { |
2363 | 0 | true => args.iter().map(|arg| format!("{arg}")).collect(), |
2364 | 0 | false => args.iter().map(|arg| format!("{arg:?}")).collect(), |
2365 | | }; |
2366 | | |
2367 | 0 | let distinct_str = match distinct { |
2368 | 0 | true => "DISTINCT ", |
2369 | 0 | false => "", |
2370 | | }; |
2371 | 0 | write!(f, "{}({}{})", fun, distinct_str, args.join(", ")) |
2372 | 0 | } |
2373 | | |
2374 | | /// The name of the column (field) that this `Expr` will produce in the physical plan. |
2375 | | /// The difference from [Expr::schema_name] is that top-level columns are unqualified. |
2376 | 0 | pub fn physical_name(expr: &Expr) -> Result<String> { |
2377 | 0 | if let Expr::Column(col) = expr { |
2378 | 0 | Ok(col.name.clone()) |
2379 | | } else { |
2380 | 0 | Ok(expr.schema_name().to_string()) |
2381 | | } |
2382 | 0 | } |
2383 | | |
2384 | | #[cfg(test)] |
2385 | | mod test { |
2386 | | use crate::expr_fn::col; |
2387 | | use crate::{ |
2388 | | case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue, |
2389 | | ScalarUDF, ScalarUDFImpl, Volatility, |
2390 | | }; |
2391 | | use sqlparser::ast; |
2392 | | use sqlparser::ast::{Ident, IdentWithAlias}; |
2393 | | use std::any::Any; |
2394 | | |
2395 | | #[test] |
2396 | | #[allow(deprecated)] |
2397 | | fn format_case_when() -> Result<()> { |
2398 | | let expr = case(col("a")) |
2399 | | .when(lit(1), lit(true)) |
2400 | | .when(lit(0), lit(false)) |
2401 | | .otherwise(lit(ScalarValue::Null))?; |
2402 | | let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END"; |
2403 | | assert_eq!(expected, expr.canonical_name()); |
2404 | | assert_eq!(expected, format!("{expr}")); |
2405 | | Ok(()) |
2406 | | } |
2407 | | |
2408 | | #[test] |
2409 | | #[allow(deprecated)] |
2410 | | fn format_cast() -> Result<()> { |
2411 | | let expr = Expr::Cast(Cast { |
2412 | | expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)))), |
2413 | | data_type: DataType::Utf8, |
2414 | | }); |
2415 | | let expected_canonical = "CAST(Float32(1.23) AS Utf8)"; |
2416 | | assert_eq!(expected_canonical, expr.canonical_name()); |
2417 | | assert_eq!(expected_canonical, format!("{expr}")); |
2418 | | // note that CAST intentionally has a name that is different from its `Display` |
2419 | | // representation. CAST does not change the name of expressions. |
2420 | | assert_eq!("Float32(1.23)", expr.schema_name().to_string()); |
2421 | | Ok(()) |
2422 | | } |
2423 | | |
2424 | | #[test] |
2425 | | fn test_partial_ord() { |
2426 | | // Test validates that partial ord is defined for Expr, not |
2427 | | // intended to exhaustively test all possibilities |
2428 | | let exp1 = col("a") + lit(1); |
2429 | | let exp2 = col("a") + lit(2); |
2430 | | let exp3 = !(col("a") + lit(2)); |
2431 | | |
2432 | | assert!(exp1 < exp2); |
2433 | | assert!(exp3 > exp2); |
2434 | | assert!(exp1 < exp3) |
2435 | | } |
2436 | | |
2437 | | #[test] |
2438 | | fn test_collect_expr() -> Result<()> { |
2439 | | // single column |
2440 | | { |
2441 | | let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64)); |
2442 | | let columns = expr.column_refs(); |
2443 | | assert_eq!(1, columns.len()); |
2444 | | assert!(columns.contains(&Column::from_name("a"))); |
2445 | | } |
2446 | | |
2447 | | // multiple columns |
2448 | | { |
2449 | | let expr = col("a") + col("b") + lit(1); |
2450 | | let columns = expr.column_refs(); |
2451 | | assert_eq!(2, columns.len()); |
2452 | | assert!(columns.contains(&Column::from_name("a"))); |
2453 | | assert!(columns.contains(&Column::from_name("b"))); |
2454 | | } |
2455 | | |
2456 | | Ok(()) |
2457 | | } |
2458 | | |
2459 | | #[test] |
2460 | | fn test_logical_ops() { |
2461 | | assert_eq!( |
2462 | | format!("{}", lit(1u32).eq(lit(2u32))), |
2463 | | "UInt32(1) = UInt32(2)" |
2464 | | ); |
2465 | | assert_eq!( |
2466 | | format!("{}", lit(1u32).not_eq(lit(2u32))), |
2467 | | "UInt32(1) != UInt32(2)" |
2468 | | ); |
2469 | | assert_eq!( |
2470 | | format!("{}", lit(1u32).gt(lit(2u32))), |
2471 | | "UInt32(1) > UInt32(2)" |
2472 | | ); |
2473 | | assert_eq!( |
2474 | | format!("{}", lit(1u32).gt_eq(lit(2u32))), |
2475 | | "UInt32(1) >= UInt32(2)" |
2476 | | ); |
2477 | | assert_eq!( |
2478 | | format!("{}", lit(1u32).lt(lit(2u32))), |
2479 | | "UInt32(1) < UInt32(2)" |
2480 | | ); |
2481 | | assert_eq!( |
2482 | | format!("{}", lit(1u32).lt_eq(lit(2u32))), |
2483 | | "UInt32(1) <= UInt32(2)" |
2484 | | ); |
2485 | | assert_eq!( |
2486 | | format!("{}", lit(1u32).and(lit(2u32))), |
2487 | | "UInt32(1) AND UInt32(2)" |
2488 | | ); |
2489 | | assert_eq!( |
2490 | | format!("{}", lit(1u32).or(lit(2u32))), |
2491 | | "UInt32(1) OR UInt32(2)" |
2492 | | ); |
2493 | | } |
2494 | | |
2495 | | #[test] |
2496 | | fn test_is_volatile_scalar_func() { |
2497 | | // UDF |
2498 | | #[derive(Debug)] |
2499 | | struct TestScalarUDF { |
2500 | | signature: Signature, |
2501 | | } |
2502 | | impl ScalarUDFImpl for TestScalarUDF { |
2503 | | fn as_any(&self) -> &dyn Any { |
2504 | | self |
2505 | | } |
2506 | | fn name(&self) -> &str { |
2507 | | "TestScalarUDF" |
2508 | | } |
2509 | | |
2510 | | fn signature(&self) -> &Signature { |
2511 | | &self.signature |
2512 | | } |
2513 | | |
2514 | | fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { |
2515 | | Ok(DataType::Utf8) |
2516 | | } |
2517 | | |
2518 | | fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> { |
2519 | | Ok(ColumnarValue::Scalar(ScalarValue::from("a"))) |
2520 | | } |
2521 | | } |
2522 | | let udf = Arc::new(ScalarUDF::from(TestScalarUDF { |
2523 | | signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable), |
2524 | | })); |
2525 | | assert_ne!(udf.signature().volatility, Volatility::Volatile); |
2526 | | |
2527 | | let udf = Arc::new(ScalarUDF::from(TestScalarUDF { |
2528 | | signature: Signature::uniform( |
2529 | | 1, |
2530 | | vec![DataType::Float32], |
2531 | | Volatility::Volatile, |
2532 | | ), |
2533 | | })); |
2534 | | assert_eq!(udf.signature().volatility, Volatility::Volatile); |
2535 | | } |
2536 | | |
2537 | | use super::*; |
2538 | | |
2539 | | #[test] |
2540 | | fn test_first_value_return_type() -> Result<()> { |
2541 | | let fun = find_df_window_func("first_value").unwrap(); |
2542 | | let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; |
2543 | | assert_eq!(DataType::Utf8, observed); |
2544 | | |
2545 | | let observed = fun.return_type(&[DataType::UInt64], &[true], "")?; |
2546 | | assert_eq!(DataType::UInt64, observed); |
2547 | | |
2548 | | Ok(()) |
2549 | | } |
2550 | | |
2551 | | #[test] |
2552 | | fn test_last_value_return_type() -> Result<()> { |
2553 | | let fun = find_df_window_func("last_value").unwrap(); |
2554 | | let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; |
2555 | | assert_eq!(DataType::Utf8, observed); |
2556 | | |
2557 | | let observed = fun.return_type(&[DataType::Float64], &[true], "")?; |
2558 | | assert_eq!(DataType::Float64, observed); |
2559 | | |
2560 | | Ok(()) |
2561 | | } |
2562 | | |
2563 | | #[test] |
2564 | | fn test_lead_return_type() -> Result<()> { |
2565 | | let fun = find_df_window_func("lead").unwrap(); |
2566 | | let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; |
2567 | | assert_eq!(DataType::Utf8, observed); |
2568 | | |
2569 | | let observed = fun.return_type(&[DataType::Float64], &[true], "")?; |
2570 | | assert_eq!(DataType::Float64, observed); |
2571 | | |
2572 | | Ok(()) |
2573 | | } |
2574 | | |
2575 | | #[test] |
2576 | | fn test_lag_return_type() -> Result<()> { |
2577 | | let fun = find_df_window_func("lag").unwrap(); |
2578 | | let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; |
2579 | | assert_eq!(DataType::Utf8, observed); |
2580 | | |
2581 | | let observed = fun.return_type(&[DataType::Float64], &[true], "")?; |
2582 | | assert_eq!(DataType::Float64, observed); |
2583 | | |
2584 | | Ok(()) |
2585 | | } |
2586 | | |
2587 | | #[test] |
2588 | | fn test_nth_value_return_type() -> Result<()> { |
2589 | | let fun = find_df_window_func("nth_value").unwrap(); |
2590 | | let observed = |
2591 | | fun.return_type(&[DataType::Utf8, DataType::UInt64], &[true, true], "")?; |
2592 | | assert_eq!(DataType::Utf8, observed); |
2593 | | |
2594 | | let observed = |
2595 | | fun.return_type(&[DataType::Float64, DataType::UInt64], &[true, true], "")?; |
2596 | | assert_eq!(DataType::Float64, observed); |
2597 | | |
2598 | | Ok(()) |
2599 | | } |
2600 | | |
2601 | | #[test] |
2602 | | fn test_percent_rank_return_type() -> Result<()> { |
2603 | | let fun = find_df_window_func("percent_rank").unwrap(); |
2604 | | let observed = fun.return_type(&[], &[], "")?; |
2605 | | assert_eq!(DataType::Float64, observed); |
2606 | | |
2607 | | Ok(()) |
2608 | | } |
2609 | | |
2610 | | #[test] |
2611 | | fn test_cume_dist_return_type() -> Result<()> { |
2612 | | let fun = find_df_window_func("cume_dist").unwrap(); |
2613 | | let observed = fun.return_type(&[], &[], "")?; |
2614 | | assert_eq!(DataType::Float64, observed); |
2615 | | |
2616 | | Ok(()) |
2617 | | } |
2618 | | |
2619 | | #[test] |
2620 | | fn test_ntile_return_type() -> Result<()> { |
2621 | | let fun = find_df_window_func("ntile").unwrap(); |
2622 | | let observed = fun.return_type(&[DataType::Int16], &[true], "")?; |
2623 | | assert_eq!(DataType::UInt64, observed); |
2624 | | |
2625 | | Ok(()) |
2626 | | } |
2627 | | |
2628 | | #[test] |
2629 | | fn test_window_function_case_insensitive() -> Result<()> { |
2630 | | let names = vec![ |
2631 | | "rank", |
2632 | | "dense_rank", |
2633 | | "percent_rank", |
2634 | | "cume_dist", |
2635 | | "ntile", |
2636 | | "lag", |
2637 | | "lead", |
2638 | | "first_value", |
2639 | | "last_value", |
2640 | | "nth_value", |
2641 | | ]; |
2642 | | for name in names { |
2643 | | let fun = find_df_window_func(name).unwrap(); |
2644 | | let fun2 = find_df_window_func(name.to_uppercase().as_str()).unwrap(); |
2645 | | assert_eq!(fun, fun2); |
2646 | | if fun.to_string() == "first_value" || fun.to_string() == "last_value" { |
2647 | | assert_eq!(fun.to_string(), name); |
2648 | | } else { |
2649 | | assert_eq!(fun.to_string(), name.to_uppercase()); |
2650 | | } |
2651 | | } |
2652 | | Ok(()) |
2653 | | } |
2654 | | |
2655 | | #[test] |
2656 | | fn test_find_df_window_function() { |
2657 | | assert_eq!( |
2658 | | find_df_window_func("cume_dist"), |
2659 | | Some(WindowFunctionDefinition::BuiltInWindowFunction( |
2660 | | built_in_window_function::BuiltInWindowFunction::CumeDist |
2661 | | )) |
2662 | | ); |
2663 | | assert_eq!( |
2664 | | find_df_window_func("first_value"), |
2665 | | Some(WindowFunctionDefinition::BuiltInWindowFunction( |
2666 | | built_in_window_function::BuiltInWindowFunction::FirstValue |
2667 | | )) |
2668 | | ); |
2669 | | assert_eq!( |
2670 | | find_df_window_func("LAST_value"), |
2671 | | Some(WindowFunctionDefinition::BuiltInWindowFunction( |
2672 | | built_in_window_function::BuiltInWindowFunction::LastValue |
2673 | | )) |
2674 | | ); |
2675 | | assert_eq!( |
2676 | | find_df_window_func("LAG"), |
2677 | | Some(WindowFunctionDefinition::BuiltInWindowFunction( |
2678 | | built_in_window_function::BuiltInWindowFunction::Lag |
2679 | | )) |
2680 | | ); |
2681 | | assert_eq!( |
2682 | | find_df_window_func("LEAD"), |
2683 | | Some(WindowFunctionDefinition::BuiltInWindowFunction( |
2684 | | built_in_window_function::BuiltInWindowFunction::Lead |
2685 | | )) |
2686 | | ); |
2687 | | assert_eq!(find_df_window_func("not_exist"), None) |
2688 | | } |
2689 | | |
2690 | | #[test] |
2691 | | fn test_display_wildcard() { |
2692 | | assert_eq!(format!("{}", wildcard()), "*"); |
2693 | | assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*"); |
2694 | | assert_eq!( |
2695 | | format!( |
2696 | | "{}", |
2697 | | wildcard_with_options(wildcard_options( |
2698 | | Some(IlikeSelectItem { |
2699 | | pattern: "c1".to_string() |
2700 | | }), |
2701 | | None, |
2702 | | None, |
2703 | | None, |
2704 | | None |
2705 | | )) |
2706 | | ), |
2707 | | "* ILIKE 'c1'" |
2708 | | ); |
2709 | | assert_eq!( |
2710 | | format!( |
2711 | | "{}", |
2712 | | wildcard_with_options(wildcard_options( |
2713 | | None, |
2714 | | Some(ExcludeSelectItem::Multiple(vec![ |
2715 | | Ident::from("c1"), |
2716 | | Ident::from("c2") |
2717 | | ])), |
2718 | | None, |
2719 | | None, |
2720 | | None |
2721 | | )) |
2722 | | ), |
2723 | | "* EXCLUDE (c1, c2)" |
2724 | | ); |
2725 | | assert_eq!( |
2726 | | format!( |
2727 | | "{}", |
2728 | | wildcard_with_options(wildcard_options( |
2729 | | None, |
2730 | | None, |
2731 | | Some(ExceptSelectItem { |
2732 | | first_element: Ident::from("c1"), |
2733 | | additional_elements: vec![Ident::from("c2")] |
2734 | | }), |
2735 | | None, |
2736 | | None |
2737 | | )) |
2738 | | ), |
2739 | | "* EXCEPT (c1, c2)" |
2740 | | ); |
2741 | | assert_eq!( |
2742 | | format!( |
2743 | | "{}", |
2744 | | wildcard_with_options(wildcard_options( |
2745 | | None, |
2746 | | None, |
2747 | | None, |
2748 | | Some(PlannedReplaceSelectItem { |
2749 | | items: vec![ReplaceSelectElement { |
2750 | | expr: ast::Expr::Identifier(Ident::from("c1")), |
2751 | | column_name: Ident::from("a1"), |
2752 | | as_keyword: false |
2753 | | }], |
2754 | | planned_expressions: vec![] |
2755 | | }), |
2756 | | None |
2757 | | )) |
2758 | | ), |
2759 | | "* REPLACE (c1 a1)" |
2760 | | ); |
2761 | | assert_eq!( |
2762 | | format!( |
2763 | | "{}", |
2764 | | wildcard_with_options(wildcard_options( |
2765 | | None, |
2766 | | None, |
2767 | | None, |
2768 | | None, |
2769 | | Some(RenameSelectItem::Multiple(vec![IdentWithAlias { |
2770 | | ident: Ident::from("c1"), |
2771 | | alias: Ident::from("a1") |
2772 | | }])) |
2773 | | )) |
2774 | | ), |
2775 | | "* RENAME (c1 AS a1)" |
2776 | | ) |
2777 | | } |
2778 | | |
2779 | | fn wildcard_options( |
2780 | | opt_ilike: Option<IlikeSelectItem>, |
2781 | | opt_exclude: Option<ExcludeSelectItem>, |
2782 | | opt_except: Option<ExceptSelectItem>, |
2783 | | opt_replace: Option<PlannedReplaceSelectItem>, |
2784 | | opt_rename: Option<RenameSelectItem>, |
2785 | | ) -> WildcardOptions { |
2786 | | WildcardOptions { |
2787 | | ilike: opt_ilike, |
2788 | | exclude: opt_exclude, |
2789 | | except: opt_except, |
2790 | | replace: opt_replace, |
2791 | | rename: opt_rename, |
2792 | | } |
2793 | | } |
2794 | | } |