Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/expr/src/expr.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Logical Expressions: [`Expr`]
19
20
use std::collections::{HashMap, HashSet};
21
use std::fmt::{self, Display, Formatter, Write};
22
use std::hash::{Hash, Hasher};
23
use std::mem;
24
use std::str::FromStr;
25
use std::sync::Arc;
26
27
use crate::expr_fn::binary_expr;
28
use crate::logical_plan::Subquery;
29
use crate::utils::expr_to_columns;
30
use crate::Volatility;
31
use crate::{
32
    built_in_window_function, udaf, BuiltInWindowFunction, ExprSchemable, Operator,
33
    Signature, WindowFrame, WindowUDF,
34
};
35
36
use arrow::datatypes::{DataType, FieldRef};
37
use datafusion_common::tree_node::{
38
    Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
39
};
40
use datafusion_common::{
41
    plan_err, Column, DFSchema, Result, ScalarValue, TableReference,
42
};
43
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
44
use sqlparser::ast::{
45
    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
46
    NullTreatment, RenameSelectItem, ReplaceSelectElement,
47
};
48
49
/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
50
///
51
/// For example the expression `A + 1` will be represented as
52
///
53
///```text
54
///  BinaryExpr {
55
///    left: Expr::Column("A"),
56
///    op: Operator::Plus,
57
///    right: Expr::Literal(ScalarValue::Int32(Some(1)))
58
/// }
59
/// ```
60
///
61
/// # Creating Expressions
62
///
63
/// `Expr`s can be created directly, but it is often easier and less verbose to
64
/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
65
/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
66
///
67
/// See also [`ExprFunctionExt`] for creating aggregate and window functions.
68
///
69
/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
70
///
71
/// # Schema Access
72
///
73
/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
74
/// of an `Expr`.
75
///
76
/// # Visiting and Rewriting `Expr`s
77
///
78
/// The `Expr` struct implements the [`TreeNode`] trait for walking and
79
/// rewriting expressions. For example [`TreeNode::apply`] recursively visits an
80
/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
81
/// the examples below and [`TreeNode`] for more information.
82
///
83
/// # Examples
84
///
85
/// ## Column references and literals
86
///
87
/// [`Expr::Column`] refer to the values of columns and are often created with
88
/// the [`col`] function. For example to create an expression `c1` referring to
89
/// column named "c1":
90
///
91
/// [`col`]: crate::expr_fn::col
92
///
93
/// ```
94
/// # use datafusion_common::Column;
95
/// # use datafusion_expr::{lit, col, Expr};
96
/// let expr = col("c1");
97
/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
98
/// ```
99
///
100
/// [`Expr::Literal`] refer to literal, or constant, values. These are created
101
/// with the [`lit`] function. For example to create an expression `42`:
102
///
103
/// [`lit`]: crate::lit
104
///
105
/// ```
106
/// # use datafusion_common::{Column, ScalarValue};
107
/// # use datafusion_expr::{lit, col, Expr};
108
/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
109
/// let expr = lit(42i64);
110
/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42))));
111
/// // To make a (typed) NULL:
112
/// let expr = Expr::Literal(ScalarValue::Int64(None));
113
/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
114
/// let expr = lit(ScalarValue::Null);
115
/// ```
116
///
117
/// ## Binary Expressions
118
///
119
/// Exprs implement traits that allow easy to understand construction of more
120
/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
121
/// "c2" together
122
///
123
/// ```
124
/// # use datafusion_expr::{lit, col, Operator, Expr};
125
/// // Use the `+` operator to add two columns together
126
/// let expr = col("c1") + col("c2");
127
/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
128
/// if let Expr::BinaryExpr(binary_expr) = expr {
129
///   assert_eq!(*binary_expr.left, col("c1"));
130
///   assert_eq!(*binary_expr.right, col("c2"));
131
///   assert_eq!(binary_expr.op, Operator::Plus);
132
/// }
133
/// ```
134
///
135
/// The expression `c1 = 42` to compares the value in column "c1" to the
136
/// literal value `42`:
137
///
138
/// ```
139
/// # use datafusion_common::ScalarValue;
140
/// # use datafusion_expr::{lit, col, Operator, Expr};
141
/// let expr = col("c1").eq(lit(42_i32));
142
/// assert!(matches!(expr, Expr::BinaryExpr { .. } ));
143
/// if let Expr::BinaryExpr(binary_expr) = expr {
144
///   assert_eq!(*binary_expr.left, col("c1"));
145
///   let scalar = ScalarValue::Int32(Some(42));
146
///   assert_eq!(*binary_expr.right, Expr::Literal(scalar));
147
///   assert_eq!(binary_expr.op, Operator::Eq);
148
/// }
149
/// ```
150
///
151
/// Here is how to implement the equivalent of `SELECT *` to select all
152
/// [`Expr::Column`] from a [`DFSchema`]'s columns:
153
///
154
/// ```
155
/// # use arrow::datatypes::{DataType, Field, Schema};
156
/// # use datafusion_common::{DFSchema, Column};
157
/// # use datafusion_expr::Expr;
158
/// // Create a schema c1(int, c2 float)
159
/// let arrow_schema = Schema::new(vec![
160
///    Field::new("c1", DataType::Int32, false),
161
///    Field::new("c2", DataType::Float64, false),
162
/// ]);
163
/// // DFSchema is a an Arrow schema with optional relation name
164
/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema)
165
///   .unwrap();
166
///
167
/// // Form Vec<Expr> with an expression for each column in the schema
168
/// let exprs: Vec<_> = df_schema.iter()
169
///   .map(Expr::from)
170
///   .collect();
171
///
172
/// assert_eq!(exprs, vec![
173
///   Expr::from(Column::from_qualified_name("t1.c1")),
174
///   Expr::from(Column::from_qualified_name("t1.c2")),
175
/// ]);
176
/// ```
177
///
178
/// # Visiting and Rewriting `Expr`s
179
///
180
/// Here is an example that finds all literals in an `Expr` tree:
181
/// ```
182
/// # use std::collections::{HashSet};
183
/// use datafusion_common::ScalarValue;
184
/// # use datafusion_expr::{col, Expr, lit};
185
/// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
186
/// // Expression a = 5 AND b = 6
187
/// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6));
188
/// // find all literals in a HashMap
189
/// let mut scalars = HashSet::new();
190
/// // apply recursively visits all nodes in the expression tree
191
/// expr.apply(|e| {
192
///    if let Expr::Literal(scalar) = e {
193
///       scalars.insert(scalar);
194
///    }
195
///    // The return value controls whether to continue visiting the tree
196
///    Ok(TreeNodeRecursion::Continue)
197
/// }).unwrap();
198
/// // All subtrees have been visited and literals found
199
/// assert_eq!(scalars.len(), 2);
200
/// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
201
/// assert!(scalars.contains(&ScalarValue::Int32(Some(6))));
202
/// ```
203
///
204
/// Rewrite an expression, replacing references to column "a" in an
205
/// to the literal `42`:
206
///
207
///  ```
208
/// # use datafusion_common::tree_node::{Transformed, TreeNode};
209
/// # use datafusion_expr::{col, Expr, lit};
210
/// // expression a = 5 AND b = 6
211
/// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6)));
212
/// // rewrite all references to column "a" to the literal 42
213
/// let rewritten = expr.transform(|e| {
214
///   if let Expr::Column(c) = &e {
215
///     if &c.name == "a" {
216
///       // return Transformed::yes to indicate the node was changed
217
///       return Ok(Transformed::yes(lit(42)))
218
///     }
219
///   }
220
///   // return Transformed::no to indicate the node was not changed
221
///   Ok(Transformed::no(e))
222
/// }).unwrap();
223
/// // The expression has been rewritten
224
/// assert!(rewritten.transformed);
225
/// // to 42 = 5 AND b = 6
226
/// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6))));
227
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
228
pub enum Expr {
229
    /// An expression with a specific name.
230
    Alias(Alias),
231
    /// A named reference to a qualified field in a schema.
232
    Column(Column),
233
    /// A named reference to a variable in a registry.
234
    ScalarVariable(DataType, Vec<String>),
235
    /// A constant value.
236
    Literal(ScalarValue),
237
    /// A binary expression such as "age > 21"
238
    BinaryExpr(BinaryExpr),
239
    /// LIKE expression
240
    Like(Like),
241
    /// LIKE expression that uses regular expressions
242
    SimilarTo(Like),
243
    /// Negation of an expression. The expression's type must be a boolean to make sense.
244
    Not(Box<Expr>),
245
    /// True if argument is not NULL, false otherwise. This expression itself is never NULL.
246
    IsNotNull(Box<Expr>),
247
    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
248
    IsNull(Box<Expr>),
249
    /// True if argument is true, false otherwise. This expression itself is never NULL.
250
    IsTrue(Box<Expr>),
251
    /// True if argument is  false, false otherwise. This expression itself is never NULL.
252
    IsFalse(Box<Expr>),
253
    /// True if argument is NULL, false otherwise. This expression itself is never NULL.
254
    IsUnknown(Box<Expr>),
255
    /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL.
256
    IsNotTrue(Box<Expr>),
257
    /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL.
258
    IsNotFalse(Box<Expr>),
259
    /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL.
260
    IsNotUnknown(Box<Expr>),
261
    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
262
    Negative(Box<Expr>),
263
    /// Whether an expression is between a given range.
264
    Between(Between),
265
    /// The CASE expression is similar to a series of nested if/else and there are two forms that
266
    /// can be used. The first form consists of a series of boolean "when" expressions with
267
    /// corresponding "then" expressions, and an optional "else" expression.
268
    ///
269
    /// ```text
270
    /// CASE WHEN condition THEN result
271
    ///      [WHEN ...]
272
    ///      [ELSE result]
273
    /// END
274
    /// ```
275
    ///
276
    /// The second form uses a base expression and then a series of "when" clauses that match on a
277
    /// literal value.
278
    ///
279
    /// ```text
280
    /// CASE expression
281
    ///     WHEN value THEN result
282
    ///     [WHEN ...]
283
    ///     [ELSE result]
284
    /// END
285
    /// ```
286
    Case(Case),
287
    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
288
    /// This expression is guaranteed to have a fixed type.
289
    Cast(Cast),
290
    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
291
    /// This expression is guaranteed to have a fixed type.
292
    TryCast(TryCast),
293
    /// Represents the call of a scalar function with a set of arguments.
294
    ScalarFunction(ScalarFunction),
295
    /// Calls an aggregate function with arguments, and optional
296
    /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`.
297
    ///
298
    /// See also [`ExprFunctionExt`] to set these fields.
299
    ///
300
    /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
301
    AggregateFunction(AggregateFunction),
302
    /// Represents the call of a window function with arguments.
303
    WindowFunction(WindowFunction),
304
    /// Returns whether the list contains the expr value.
305
    InList(InList),
306
    /// EXISTS subquery
307
    Exists(Exists),
308
    /// IN subquery
309
    InSubquery(InSubquery),
310
    /// Scalar subquery
311
    ScalarSubquery(Subquery),
312
    /// Represents a reference to all available fields in a specific schema,
313
    /// with an optional (schema) qualifier.
314
    ///
315
    /// This expr has to be resolved to a list of columns before translating logical
316
    /// plan into physical plan.
317
    Wildcard {
318
        qualifier: Option<TableReference>,
319
        options: WildcardOptions,
320
    },
321
    /// List of grouping set expressions. Only valid in the context of an aggregate
322
    /// GROUP BY expression list
323
    GroupingSet(GroupingSet),
324
    /// A place holder for parameters in a prepared statement
325
    /// (e.g. `$foo` or `$1`)
326
    Placeholder(Placeholder),
327
    /// A place holder which hold a reference to a qualified field
328
    /// in the outer query, used for correlated sub queries.
329
    OuterReferenceColumn(DataType, Column),
330
    /// Unnest expression
331
    Unnest(Unnest),
332
}
333
334
impl Default for Expr {
335
0
    fn default() -> Self {
336
0
        Expr::Literal(ScalarValue::Null)
337
0
    }
338
}
339
340
/// Create an [`Expr`] from a [`Column`]
341
impl From<Column> for Expr {
342
0
    fn from(value: Column) -> Self {
343
0
        Expr::Column(value)
344
0
    }
345
}
346
347
/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
348
/// useful for creating [`Expr`] from a [`DFSchema`].
349
///
350
/// See example on [`Expr`]
351
impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
352
0
    fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self {
353
0
        Expr::from(Column::from(value))
354
0
    }
355
}
356
357
/// UNNEST expression.
358
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
359
pub struct Unnest {
360
    pub expr: Box<Expr>,
361
}
362
363
impl Unnest {
364
    /// Create a new Unnest expression.
365
0
    pub fn new(expr: Expr) -> Self {
366
0
        Self {
367
0
            expr: Box::new(expr),
368
0
        }
369
0
    }
370
371
    /// Create a new Unnest expression.
372
0
    pub fn new_boxed(boxed: Box<Expr>) -> Self {
373
0
        Self { expr: boxed }
374
0
    }
375
}
376
377
/// Alias expression
378
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
379
pub struct Alias {
380
    pub expr: Box<Expr>,
381
    pub relation: Option<TableReference>,
382
    pub name: String,
383
}
384
385
impl Alias {
386
    /// Create an alias with an optional schema/field qualifier.
387
0
    pub fn new(
388
0
        expr: Expr,
389
0
        relation: Option<impl Into<TableReference>>,
390
0
        name: impl Into<String>,
391
0
    ) -> Self {
392
0
        Self {
393
0
            expr: Box::new(expr),
394
0
            relation: relation.map(|r| r.into()),
395
0
            name: name.into(),
396
0
        }
397
0
    }
398
}
399
400
/// Binary expression
401
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
402
pub struct BinaryExpr {
403
    /// Left-hand side of the expression
404
    pub left: Box<Expr>,
405
    /// The comparison operator
406
    pub op: Operator,
407
    /// Right-hand side of the expression
408
    pub right: Box<Expr>,
409
}
410
411
impl BinaryExpr {
412
    /// Create a new binary expression
413
0
    pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self {
414
0
        Self { left, op, right }
415
0
    }
416
}
417
418
impl Display for BinaryExpr {
419
0
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
420
        // Put parentheses around child binary expressions so that we can see the difference
421
        // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed,
422
        // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are
423
        // equivalent and the parentheses are not necessary.
424
425
0
        fn write_child(
426
0
            f: &mut Formatter<'_>,
427
0
            expr: &Expr,
428
0
            precedence: u8,
429
0
        ) -> fmt::Result {
430
0
            match expr {
431
0
                Expr::BinaryExpr(child) => {
432
0
                    let p = child.op.precedence();
433
0
                    if p == 0 || p < precedence {
434
0
                        write!(f, "({child})")?;
435
                    } else {
436
0
                        write!(f, "{child}")?;
437
                    }
438
                }
439
0
                _ => write!(f, "{expr}")?,
440
            }
441
0
            Ok(())
442
0
        }
443
444
0
        let precedence = self.op.precedence();
445
0
        write_child(f, self.left.as_ref(), precedence)?;
446
0
        write!(f, " {} ", self.op)?;
447
0
        write_child(f, self.right.as_ref(), precedence)
448
0
    }
449
}
450
451
/// CASE expression
452
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)]
453
pub struct Case {
454
    /// Optional base expression that can be compared to literal values in the "when" expressions
455
    pub expr: Option<Box<Expr>>,
456
    /// One or more when/then expressions
457
    pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
458
    /// Optional "else" expression
459
    pub else_expr: Option<Box<Expr>>,
460
}
461
462
impl Case {
463
    /// Create a new Case expression
464
0
    pub fn new(
465
0
        expr: Option<Box<Expr>>,
466
0
        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
467
0
        else_expr: Option<Box<Expr>>,
468
0
    ) -> Self {
469
0
        Self {
470
0
            expr,
471
0
            when_then_expr,
472
0
            else_expr,
473
0
        }
474
0
    }
475
}
476
477
/// LIKE expression
478
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
479
pub struct Like {
480
    pub negated: bool,
481
    pub expr: Box<Expr>,
482
    pub pattern: Box<Expr>,
483
    pub escape_char: Option<char>,
484
    /// Whether to ignore case on comparing
485
    pub case_insensitive: bool,
486
}
487
488
impl Like {
489
    /// Create a new Like expression
490
0
    pub fn new(
491
0
        negated: bool,
492
0
        expr: Box<Expr>,
493
0
        pattern: Box<Expr>,
494
0
        escape_char: Option<char>,
495
0
        case_insensitive: bool,
496
0
    ) -> Self {
497
0
        Self {
498
0
            negated,
499
0
            expr,
500
0
            pattern,
501
0
            escape_char,
502
0
            case_insensitive,
503
0
        }
504
0
    }
505
}
506
507
/// BETWEEN expression
508
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
509
pub struct Between {
510
    /// The value to compare
511
    pub expr: Box<Expr>,
512
    /// Whether the expression is negated
513
    pub negated: bool,
514
    /// The low end of the range
515
    pub low: Box<Expr>,
516
    /// The high end of the range
517
    pub high: Box<Expr>,
518
}
519
520
impl Between {
521
    /// Create a new Between expression
522
0
    pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self {
523
0
        Self {
524
0
            expr,
525
0
            negated,
526
0
            low,
527
0
            high,
528
0
        }
529
0
    }
530
}
531
532
/// ScalarFunction expression invokes a built-in scalar function
533
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
534
pub struct ScalarFunction {
535
    /// The function
536
    pub func: Arc<crate::ScalarUDF>,
537
    /// List of expressions to feed to the functions as arguments
538
    pub args: Vec<Expr>,
539
}
540
541
impl ScalarFunction {
542
    // return the Function's name
543
0
    pub fn name(&self) -> &str {
544
0
        self.func.name()
545
0
    }
546
}
547
548
impl ScalarFunction {
549
    /// Create a new ScalarFunction expression with a user-defined function (UDF)
550
0
    pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self {
551
0
        Self { func: udf, args }
552
0
    }
553
}
554
555
/// Access a sub field of a nested type, such as `Field` or `List`
556
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
557
pub enum GetFieldAccess {
558
    /// Named field, for example `struct["name"]`
559
    NamedStructField { name: ScalarValue },
560
    /// Single list index, for example: `list[i]`
561
    ListIndex { key: Box<Expr> },
562
    /// List stride, for example `list[i:j:k]`
563
    ListRange {
564
        start: Box<Expr>,
565
        stop: Box<Expr>,
566
        stride: Box<Expr>,
567
    },
568
}
569
570
/// Cast expression
571
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
572
pub struct Cast {
573
    /// The expression being cast
574
    pub expr: Box<Expr>,
575
    /// The `DataType` the expression will yield
576
    pub data_type: DataType,
577
}
578
579
impl Cast {
580
    /// Create a new Cast expression
581
0
    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
582
0
        Self { expr, data_type }
583
0
    }
584
}
585
586
/// TryCast Expression
587
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
588
pub struct TryCast {
589
    /// The expression being cast
590
    pub expr: Box<Expr>,
591
    /// The `DataType` the expression will yield
592
    pub data_type: DataType,
593
}
594
595
impl TryCast {
596
    /// Create a new TryCast expression
597
0
    pub fn new(expr: Box<Expr>, data_type: DataType) -> Self {
598
0
        Self { expr, data_type }
599
0
    }
600
}
601
602
/// SORT expression
603
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
604
pub struct Sort {
605
    /// The expression to sort on
606
    pub expr: Expr,
607
    /// The direction of the sort
608
    pub asc: bool,
609
    /// Whether to put Nulls before all other data values
610
    pub nulls_first: bool,
611
}
612
613
impl Sort {
614
    /// Create a new Sort expression
615
0
    pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self {
616
0
        Self {
617
0
            expr,
618
0
            asc,
619
0
            nulls_first,
620
0
        }
621
0
    }
622
623
    /// Create a new Sort expression with the opposite sort direction
624
0
    pub fn reverse(&self) -> Self {
625
0
        Self {
626
0
            expr: self.expr.clone(),
627
0
            asc: !self.asc,
628
0
            nulls_first: !self.nulls_first,
629
0
        }
630
0
    }
631
}
632
633
impl Display for Sort {
634
0
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
635
0
        write!(f, "{}", self.expr)?;
636
0
        if self.asc {
637
0
            write!(f, " ASC")?;
638
        } else {
639
0
            write!(f, " DESC")?;
640
        }
641
0
        if self.nulls_first {
642
0
            write!(f, " NULLS FIRST")?;
643
        } else {
644
0
            write!(f, " NULLS LAST")?;
645
        }
646
0
        Ok(())
647
0
    }
648
}
649
650
/// Aggregate function
651
///
652
/// See also  [`ExprFunctionExt`] to set these fields on `Expr`
653
///
654
/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
655
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
656
pub struct AggregateFunction {
657
    /// Name of the function
658
    pub func: Arc<crate::AggregateUDF>,
659
    /// List of expressions to feed to the functions as arguments
660
    pub args: Vec<Expr>,
661
    /// Whether this is a DISTINCT aggregation or not
662
    pub distinct: bool,
663
    /// Optional filter
664
    pub filter: Option<Box<Expr>>,
665
    /// Optional ordering
666
    pub order_by: Option<Vec<Sort>>,
667
    pub null_treatment: Option<NullTreatment>,
668
}
669
670
impl AggregateFunction {
671
    /// Create a new AggregateFunction expression with a user-defined function (UDF)
672
0
    pub fn new_udf(
673
0
        func: Arc<crate::AggregateUDF>,
674
0
        args: Vec<Expr>,
675
0
        distinct: bool,
676
0
        filter: Option<Box<Expr>>,
677
0
        order_by: Option<Vec<Sort>>,
678
0
        null_treatment: Option<NullTreatment>,
679
0
    ) -> Self {
680
0
        Self {
681
0
            func,
682
0
            args,
683
0
            distinct,
684
0
            filter,
685
0
            order_by,
686
0
            null_treatment,
687
0
        }
688
0
    }
689
}
690
691
/// WindowFunction
692
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
693
/// Defines which implementation of an aggregate function DataFusion should call.
694
pub enum WindowFunctionDefinition {
695
    /// A built in aggregate function that leverages an aggregate function
696
    /// A a built-in window function
697
    BuiltInWindowFunction(built_in_window_function::BuiltInWindowFunction),
698
    /// A user defined aggregate function
699
    AggregateUDF(Arc<crate::AggregateUDF>),
700
    /// A user defined aggregate function
701
    WindowUDF(Arc<crate::WindowUDF>),
702
}
703
704
impl WindowFunctionDefinition {
705
    /// Returns the datatype of the window function
706
0
    pub fn return_type(
707
0
        &self,
708
0
        input_expr_types: &[DataType],
709
0
        _input_expr_nullable: &[bool],
710
0
        display_name: &str,
711
0
    ) -> Result<DataType> {
712
0
        match self {
713
0
            WindowFunctionDefinition::BuiltInWindowFunction(fun) => {
714
0
                fun.return_type(input_expr_types)
715
            }
716
0
            WindowFunctionDefinition::AggregateUDF(fun) => {
717
0
                fun.return_type(input_expr_types)
718
            }
719
0
            WindowFunctionDefinition::WindowUDF(fun) => fun
720
0
                .field(WindowUDFFieldArgs::new(input_expr_types, display_name))
721
0
                .map(|field| field.data_type().clone()),
722
        }
723
0
    }
724
725
    /// the signatures supported by the function `fun`.
726
0
    pub fn signature(&self) -> Signature {
727
0
        match self {
728
0
            WindowFunctionDefinition::BuiltInWindowFunction(fun) => fun.signature(),
729
0
            WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(),
730
0
            WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(),
731
        }
732
0
    }
733
734
    /// Function's name for display
735
0
    pub fn name(&self) -> &str {
736
0
        match self {
737
0
            WindowFunctionDefinition::BuiltInWindowFunction(fun) => fun.name(),
738
0
            WindowFunctionDefinition::WindowUDF(fun) => fun.name(),
739
0
            WindowFunctionDefinition::AggregateUDF(fun) => fun.name(),
740
        }
741
0
    }
742
}
743
744
impl fmt::Display for WindowFunctionDefinition {
745
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
746
1
        match self {
747
0
            WindowFunctionDefinition::BuiltInWindowFunction(fun) => {
748
0
                std::fmt::Display::fmt(fun, f)
749
            }
750
1
            WindowFunctionDefinition::AggregateUDF(fun) => std::fmt::Display::fmt(fun, f),
751
0
            WindowFunctionDefinition::WindowUDF(fun) => std::fmt::Display::fmt(fun, f),
752
        }
753
1
    }
754
}
755
756
impl From<BuiltInWindowFunction> for WindowFunctionDefinition {
757
0
    fn from(value: BuiltInWindowFunction) -> Self {
758
0
        Self::BuiltInWindowFunction(value)
759
0
    }
760
}
761
762
impl From<Arc<crate::AggregateUDF>> for WindowFunctionDefinition {
763
0
    fn from(value: Arc<crate::AggregateUDF>) -> Self {
764
0
        Self::AggregateUDF(value)
765
0
    }
766
}
767
768
impl From<Arc<WindowUDF>> for WindowFunctionDefinition {
769
0
    fn from(value: Arc<WindowUDF>) -> Self {
770
0
        Self::WindowUDF(value)
771
0
    }
772
}
773
774
/// Window function
775
///
776
/// Holds the actual actual function to call [`WindowFunction`] as well as its
777
/// arguments (`args`) and the contents of the `OVER` clause:
778
///
779
/// 1. `PARTITION BY`
780
/// 2. `ORDER BY`
781
/// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`)
782
///
783
/// # Example
784
/// ```
785
/// # use datafusion_expr::{Expr, BuiltInWindowFunction, col, ExprFunctionExt};
786
/// # use datafusion_expr::expr::WindowFunction;
787
/// // Create FIRST_VALUE(a) OVER (PARTITION BY b ORDER BY c)
788
/// let expr = Expr::WindowFunction(
789
///     WindowFunction::new(BuiltInWindowFunction::FirstValue, vec![col("a")])
790
/// )
791
///   .partition_by(vec![col("b")])
792
///   .order_by(vec![col("b").sort(true, true)])
793
///   .build()
794
///   .unwrap();
795
/// ```
796
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
797
pub struct WindowFunction {
798
    /// Name of the function
799
    pub fun: WindowFunctionDefinition,
800
    /// List of expressions to feed to the functions as arguments
801
    pub args: Vec<Expr>,
802
    /// List of partition by expressions
803
    pub partition_by: Vec<Expr>,
804
    /// List of order by expressions
805
    pub order_by: Vec<Sort>,
806
    /// Window frame
807
    pub window_frame: WindowFrame,
808
    /// Specifies how NULL value is treated: ignore or respect
809
    pub null_treatment: Option<NullTreatment>,
810
}
811
812
impl WindowFunction {
813
    /// Create a new Window expression with the specified argument an
814
    /// empty `OVER` clause
815
0
    pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self {
816
0
        Self {
817
0
            fun: fun.into(),
818
0
            args,
819
0
            partition_by: Vec::default(),
820
0
            order_by: Vec::default(),
821
0
            window_frame: WindowFrame::new(None),
822
0
            null_treatment: None,
823
0
        }
824
0
    }
825
}
826
827
/// Find DataFusion's built-in window function by name.
828
0
pub fn find_df_window_func(name: &str) -> Option<WindowFunctionDefinition> {
829
0
    let name = name.to_lowercase();
830
    // Code paths for window functions leveraging ordinary aggregators and
831
    // built-in window functions are quite different, and the same function
832
    // may have different implementations for these cases. If the sought
833
    // function is not found among built-in window functions, we search for
834
    // it among aggregate functions.
835
0
    if let Ok(built_in_function) =
836
0
        built_in_window_function::BuiltInWindowFunction::from_str(name.as_str())
837
    {
838
0
        Some(WindowFunctionDefinition::BuiltInWindowFunction(
839
0
            built_in_function,
840
0
        ))
841
    } else {
842
0
        None
843
    }
844
0
}
845
846
/// EXISTS expression
847
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
848
pub struct Exists {
849
    /// subquery that will produce a single column of data
850
    pub subquery: Subquery,
851
    /// Whether the expression is negated
852
    pub negated: bool,
853
}
854
855
impl Exists {
856
    // Create a new Exists expression.
857
0
    pub fn new(subquery: Subquery, negated: bool) -> Self {
858
0
        Self { subquery, negated }
859
0
    }
860
}
861
862
/// User Defined Aggregate Function
863
///
864
/// See [`udaf::AggregateUDF`] for more information.
865
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
866
pub struct AggregateUDF {
867
    /// The function
868
    pub fun: Arc<udaf::AggregateUDF>,
869
    /// List of expressions to feed to the functions as arguments
870
    pub args: Vec<Expr>,
871
    /// Optional filter
872
    pub filter: Option<Box<Expr>>,
873
    /// Optional ORDER BY applied prior to aggregating
874
    pub order_by: Option<Vec<Expr>>,
875
}
876
877
impl AggregateUDF {
878
    /// Create a new AggregateUDF expression
879
0
    pub fn new(
880
0
        fun: Arc<udaf::AggregateUDF>,
881
0
        args: Vec<Expr>,
882
0
        filter: Option<Box<Expr>>,
883
0
        order_by: Option<Vec<Expr>>,
884
0
    ) -> Self {
885
0
        Self {
886
0
            fun,
887
0
            args,
888
0
            filter,
889
0
            order_by,
890
0
        }
891
0
    }
892
}
893
894
/// InList expression
895
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
896
pub struct InList {
897
    /// The expression to compare
898
    pub expr: Box<Expr>,
899
    /// The list of values to compare against
900
    pub list: Vec<Expr>,
901
    /// Whether the expression is negated
902
    pub negated: bool,
903
}
904
905
impl InList {
906
    /// Create a new InList expression
907
0
    pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self {
908
0
        Self {
909
0
            expr,
910
0
            list,
911
0
            negated,
912
0
        }
913
0
    }
914
}
915
916
/// IN subquery
917
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
918
pub struct InSubquery {
919
    /// The expression to compare
920
    pub expr: Box<Expr>,
921
    /// Subquery that will produce a single column of data to compare against
922
    pub subquery: Subquery,
923
    /// Whether the expression is negated
924
    pub negated: bool,
925
}
926
927
impl InSubquery {
928
    /// Create a new InSubquery expression
929
0
    pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self {
930
0
        Self {
931
0
            expr,
932
0
            subquery,
933
0
            negated,
934
0
        }
935
0
    }
936
}
937
938
/// Placeholder, representing bind parameter values such as `$1` or `$name`.
939
///
940
/// The type of these parameters is inferred using [`Expr::infer_placeholder_types`]
941
/// or can be specified directly using `PREPARE` statements.
942
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
943
pub struct Placeholder {
944
    /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
945
    pub id: String,
946
    /// The type the parameter will be filled in with
947
    pub data_type: Option<DataType>,
948
}
949
950
impl Placeholder {
951
    /// Create a new Placeholder expression
952
0
    pub fn new(id: String, data_type: Option<DataType>) -> Self {
953
0
        Self { id, data_type }
954
0
    }
955
}
956
957
/// Grouping sets
958
///
959
/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
960
/// for Postgres definition.
961
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
962
/// for Apache Spark definition.
963
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
964
pub enum GroupingSet {
965
    /// Rollup grouping sets
966
    Rollup(Vec<Expr>),
967
    /// Cube grouping sets
968
    Cube(Vec<Expr>),
969
    /// User-defined grouping sets
970
    GroupingSets(Vec<Vec<Expr>>),
971
}
972
973
impl GroupingSet {
974
    /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this
975
    /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate
976
    /// the exprs in the underlying sets.
977
0
    pub fn distinct_expr(&self) -> Vec<&Expr> {
978
0
        match self {
979
0
            GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => {
980
0
                exprs.iter().collect()
981
            }
982
0
            GroupingSet::GroupingSets(groups) => {
983
0
                let mut exprs: Vec<&Expr> = vec![];
984
0
                for exp in groups.iter().flatten() {
985
0
                    if !exprs.contains(&exp) {
986
0
                        exprs.push(exp);
987
0
                    }
988
                }
989
0
                exprs
990
            }
991
        }
992
0
    }
993
}
994
995
/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
996
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
997
pub struct WildcardOptions {
998
    /// `[ILIKE...]`.
999
    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1000
    pub ilike: Option<IlikeSelectItem>,
1001
    /// `[EXCLUDE...]`.
1002
    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1003
    pub exclude: Option<ExcludeSelectItem>,
1004
    /// `[EXCEPT...]`.
1005
    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
1006
    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
1007
    pub except: Option<ExceptSelectItem>,
1008
    /// `[REPLACE]`
1009
    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
1010
    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
1011
    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1012
    pub replace: Option<PlannedReplaceSelectItem>,
1013
    /// `[RENAME ...]`.
1014
    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
1015
    pub rename: Option<RenameSelectItem>,
1016
}
1017
1018
impl WildcardOptions {
1019
0
    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
1020
0
        WildcardOptions {
1021
0
            ilike: self.ilike,
1022
0
            exclude: self.exclude,
1023
0
            except: self.except,
1024
0
            replace: Some(replace),
1025
0
            rename: self.rename,
1026
0
        }
1027
0
    }
1028
}
1029
1030
impl Display for WildcardOptions {
1031
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1032
0
        if let Some(ilike) = &self.ilike {
1033
0
            write!(f, " {ilike}")?;
1034
0
        }
1035
0
        if let Some(exclude) = &self.exclude {
1036
0
            write!(f, " {exclude}")?;
1037
0
        }
1038
0
        if let Some(except) = &self.except {
1039
0
            write!(f, " {except}")?;
1040
0
        }
1041
0
        if let Some(replace) = &self.replace {
1042
0
            write!(f, " {replace}")?;
1043
0
        }
1044
0
        if let Some(rename) = &self.rename {
1045
0
            write!(f, " {rename}")?;
1046
0
        }
1047
0
        Ok(())
1048
0
    }
1049
}
1050
1051
/// The planned expressions for `REPLACE`
1052
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
1053
pub struct PlannedReplaceSelectItem {
1054
    /// The original ast nodes
1055
    pub items: Vec<ReplaceSelectElement>,
1056
    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
1057
    pub planned_expressions: Vec<Expr>,
1058
}
1059
1060
impl Display for PlannedReplaceSelectItem {
1061
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1062
0
        write!(f, "REPLACE")?;
1063
0
        write!(f, " ({})", display_comma_separated(&self.items))?;
1064
0
        Ok(())
1065
0
    }
1066
}
1067
1068
impl PlannedReplaceSelectItem {
1069
0
    pub fn items(&self) -> &[ReplaceSelectElement] {
1070
0
        &self.items
1071
0
    }
1072
1073
0
    pub fn expressions(&self) -> &[Expr] {
1074
0
        &self.planned_expressions
1075
0
    }
1076
}
1077
1078
impl Expr {
1079
    #[deprecated(since = "40.0.0", note = "use schema_name instead")]
1080
0
    pub fn display_name(&self) -> Result<String> {
1081
0
        Ok(self.schema_name().to_string())
1082
0
    }
1083
1084
    /// The name of the column (field) that this `Expr` will produce.
1085
    ///
1086
    /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
1087
    /// [`Schema`] will have a field with this name.
1088
    ///
1089
    /// Note that the resulting string is subtlety different from the `Display`
1090
    /// representation for certain `Expr`. Some differences:
1091
    ///
1092
    /// 1. [`Expr::Alias`], which shows only the alias itself
1093
    /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression
1094
    ///
1095
    /// # Example
1096
    /// ```
1097
    /// # use datafusion_expr::{col, lit};
1098
    /// let expr = col("foo").eq(lit(42));
1099
    /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string());
1100
    ///
1101
    /// let expr = col("foo").alias("bar").eq(lit(11));
1102
    /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string());
1103
    /// ```
1104
    ///
1105
    /// [`Schema`]: arrow::datatypes::Schema
1106
0
    pub fn schema_name(&self) -> impl Display + '_ {
1107
0
        SchemaDisplay(self)
1108
0
    }
1109
1110
    /// Returns the qualifier and the schema name of this expression.
1111
    ///
1112
    /// Used when the expression forms the output field of a certain plan.
1113
    /// The result is the field's qualifier and field name in the plan's
1114
    /// output schema. We can use this qualified name to reference the field.
1115
0
    pub fn qualified_name(&self) -> (Option<TableReference>, String) {
1116
0
        match self {
1117
0
            Expr::Column(Column { relation, name }) => (relation.clone(), name.clone()),
1118
0
            Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()),
1119
0
            _ => (None, self.schema_name().to_string()),
1120
        }
1121
0
    }
1122
1123
    /// Returns a full and complete string representation of this expression.
1124
    #[deprecated(note = "use format! instead")]
1125
0
    pub fn canonical_name(&self) -> String {
1126
0
        format!("{self}")
1127
0
    }
1128
1129
    /// Return String representation of the variant represented by `self`
1130
    /// Useful for non-rust based bindings
1131
0
    pub fn variant_name(&self) -> &str {
1132
0
        match self {
1133
0
            Expr::AggregateFunction { .. } => "AggregateFunction",
1134
0
            Expr::Alias(..) => "Alias",
1135
0
            Expr::Between { .. } => "Between",
1136
0
            Expr::BinaryExpr { .. } => "BinaryExpr",
1137
0
            Expr::Case { .. } => "Case",
1138
0
            Expr::Cast { .. } => "Cast",
1139
0
            Expr::Column(..) => "Column",
1140
0
            Expr::OuterReferenceColumn(_, _) => "Outer",
1141
0
            Expr::Exists { .. } => "Exists",
1142
0
            Expr::GroupingSet(..) => "GroupingSet",
1143
0
            Expr::InList { .. } => "InList",
1144
0
            Expr::InSubquery(..) => "InSubquery",
1145
0
            Expr::IsNotNull(..) => "IsNotNull",
1146
0
            Expr::IsNull(..) => "IsNull",
1147
0
            Expr::Like { .. } => "Like",
1148
0
            Expr::SimilarTo { .. } => "RLike",
1149
0
            Expr::IsTrue(..) => "IsTrue",
1150
0
            Expr::IsFalse(..) => "IsFalse",
1151
0
            Expr::IsUnknown(..) => "IsUnknown",
1152
0
            Expr::IsNotTrue(..) => "IsNotTrue",
1153
0
            Expr::IsNotFalse(..) => "IsNotFalse",
1154
0
            Expr::IsNotUnknown(..) => "IsNotUnknown",
1155
0
            Expr::Literal(..) => "Literal",
1156
0
            Expr::Negative(..) => "Negative",
1157
0
            Expr::Not(..) => "Not",
1158
0
            Expr::Placeholder(_) => "Placeholder",
1159
0
            Expr::ScalarFunction(..) => "ScalarFunction",
1160
0
            Expr::ScalarSubquery { .. } => "ScalarSubquery",
1161
0
            Expr::ScalarVariable(..) => "ScalarVariable",
1162
0
            Expr::TryCast { .. } => "TryCast",
1163
0
            Expr::WindowFunction { .. } => "WindowFunction",
1164
0
            Expr::Wildcard { .. } => "Wildcard",
1165
0
            Expr::Unnest { .. } => "Unnest",
1166
        }
1167
0
    }
1168
1169
    /// Return `self == other`
1170
0
    pub fn eq(self, other: Expr) -> Expr {
1171
0
        binary_expr(self, Operator::Eq, other)
1172
0
    }
1173
1174
    /// Return `self != other`
1175
0
    pub fn not_eq(self, other: Expr) -> Expr {
1176
0
        binary_expr(self, Operator::NotEq, other)
1177
0
    }
1178
1179
    /// Return `self > other`
1180
0
    pub fn gt(self, other: Expr) -> Expr {
1181
0
        binary_expr(self, Operator::Gt, other)
1182
0
    }
1183
1184
    /// Return `self >= other`
1185
0
    pub fn gt_eq(self, other: Expr) -> Expr {
1186
0
        binary_expr(self, Operator::GtEq, other)
1187
0
    }
1188
1189
    /// Return `self < other`
1190
0
    pub fn lt(self, other: Expr) -> Expr {
1191
0
        binary_expr(self, Operator::Lt, other)
1192
0
    }
1193
1194
    /// Return `self <= other`
1195
0
    pub fn lt_eq(self, other: Expr) -> Expr {
1196
0
        binary_expr(self, Operator::LtEq, other)
1197
0
    }
1198
1199
    /// Return `self && other`
1200
0
    pub fn and(self, other: Expr) -> Expr {
1201
0
        binary_expr(self, Operator::And, other)
1202
0
    }
1203
1204
    /// Return `self || other`
1205
0
    pub fn or(self, other: Expr) -> Expr {
1206
0
        binary_expr(self, Operator::Or, other)
1207
0
    }
1208
1209
    /// Return `self LIKE other`
1210
0
    pub fn like(self, other: Expr) -> Expr {
1211
0
        Expr::Like(Like::new(
1212
0
            false,
1213
0
            Box::new(self),
1214
0
            Box::new(other),
1215
0
            None,
1216
0
            false,
1217
0
        ))
1218
0
    }
1219
1220
    /// Return `self NOT LIKE other`
1221
0
    pub fn not_like(self, other: Expr) -> Expr {
1222
0
        Expr::Like(Like::new(
1223
0
            true,
1224
0
            Box::new(self),
1225
0
            Box::new(other),
1226
0
            None,
1227
0
            false,
1228
0
        ))
1229
0
    }
1230
1231
    /// Return `self ILIKE other`
1232
0
    pub fn ilike(self, other: Expr) -> Expr {
1233
0
        Expr::Like(Like::new(
1234
0
            false,
1235
0
            Box::new(self),
1236
0
            Box::new(other),
1237
0
            None,
1238
0
            true,
1239
0
        ))
1240
0
    }
1241
1242
    /// Return `self NOT ILIKE other`
1243
0
    pub fn not_ilike(self, other: Expr) -> Expr {
1244
0
        Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true))
1245
0
    }
1246
1247
    /// Return the name to use for the specific Expr
1248
0
    pub fn name_for_alias(&self) -> Result<String> {
1249
0
        Ok(self.schema_name().to_string())
1250
0
    }
1251
1252
    /// Ensure `expr` has the name as `original_name` by adding an
1253
    /// alias if necessary.
1254
0
    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
1255
0
        let new_name = self.name_for_alias()?;
1256
0
        if new_name == original_name {
1257
0
            return Ok(self);
1258
0
        }
1259
0
1260
0
        Ok(self.alias(original_name))
1261
0
    }
1262
1263
    /// Return `self AS name` alias expression
1264
0
    pub fn alias(self, name: impl Into<String>) -> Expr {
1265
0
        Expr::Alias(Alias::new(self, None::<&str>, name.into()))
1266
0
    }
1267
1268
    /// Return `self AS name` alias expression with a specific qualifier
1269
0
    pub fn alias_qualified(
1270
0
        self,
1271
0
        relation: Option<impl Into<TableReference>>,
1272
0
        name: impl Into<String>,
1273
0
    ) -> Expr {
1274
0
        Expr::Alias(Alias::new(self, relation, name.into()))
1275
0
    }
1276
1277
    /// Remove an alias from an expression if one exists.
1278
    ///
1279
    /// If the expression is not an alias, the expression is returned unchanged.
1280
    /// This method does not remove aliases from nested expressions.
1281
    ///
1282
    /// # Example
1283
    /// ```
1284
    /// # use datafusion_expr::col;
1285
    /// // `foo as "bar"` is unaliased to `foo`
1286
    /// let expr = col("foo").alias("bar");
1287
    /// assert_eq!(expr.unalias(), col("foo"));
1288
    ///
1289
    /// // `foo as "bar" + baz` is not unaliased
1290
    /// let expr = col("foo").alias("bar") + col("baz");
1291
    /// assert_eq!(expr.clone().unalias(), expr);
1292
    ///
1293
    /// // `foo as "bar" as "baz" is unalaised to foo as "bar"
1294
    /// let expr = col("foo").alias("bar").alias("baz");
1295
    /// assert_eq!(expr.unalias(), col("foo").alias("bar"));
1296
    /// ```
1297
0
    pub fn unalias(self) -> Expr {
1298
0
        match self {
1299
0
            Expr::Alias(alias) => *alias.expr,
1300
0
            _ => self,
1301
        }
1302
0
    }
1303
1304
    /// Recursively removed potentially multiple aliases from an expression.
1305
    ///
1306
    /// This method removes nested aliases and returns [`Transformed`]
1307
    /// to signal if the expression was changed.
1308
    ///
1309
    /// # Example
1310
    /// ```
1311
    /// # use datafusion_expr::col;
1312
    /// // `foo as "bar"` is unaliased to `foo`
1313
    /// let expr = col("foo").alias("bar");
1314
    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1315
    ///
1316
    /// // `foo as "bar" + baz` is  unaliased
1317
    /// let expr = col("foo").alias("bar") + col("baz");
1318
    /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz"));
1319
    ///
1320
    /// // `foo as "bar" as "baz" is unalaised to foo
1321
    /// let expr = col("foo").alias("bar").alias("baz");
1322
    /// assert_eq!(expr.unalias_nested().data, col("foo"));
1323
    /// ```
1324
0
    pub fn unalias_nested(self) -> Transformed<Expr> {
1325
0
        self.transform_down_up(
1326
0
            |expr| {
1327
                // f_down: skip subqueries.  Check in f_down to avoid recursing into them
1328
0
                let recursion = if matches!(
1329
0
                    expr,
1330
                    Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_)
1331
                ) {
1332
                    // subqueries could contain aliases so don't recurse into those
1333
0
                    TreeNodeRecursion::Jump
1334
                } else {
1335
0
                    TreeNodeRecursion::Continue
1336
                };
1337
0
                Ok(Transformed::new(expr, false, recursion))
1338
0
            },
1339
0
            |expr| {
1340
                // f_up: unalias on up so we can remove nested aliases like
1341
                // `(x as foo) as bar`
1342
0
                if let Expr::Alias(Alias { expr, .. }) = expr {
1343
0
                    Ok(Transformed::yes(*expr))
1344
                } else {
1345
0
                    Ok(Transformed::no(expr))
1346
                }
1347
0
            },
1348
0
        )
1349
0
        // unreachable code: internal closure doesn't return err
1350
0
        .unwrap()
1351
0
    }
1352
1353
    /// Return `self IN <list>` if `negated` is false, otherwise
1354
    /// return `self NOT IN <list>`.a
1355
0
    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
1356
0
        Expr::InList(InList::new(Box::new(self), list, negated))
1357
0
    }
1358
1359
    /// Return `IsNull(Box(self))
1360
0
    pub fn is_null(self) -> Expr {
1361
0
        Expr::IsNull(Box::new(self))
1362
0
    }
1363
1364
    /// Return `IsNotNull(Box(self))
1365
0
    pub fn is_not_null(self) -> Expr {
1366
0
        Expr::IsNotNull(Box::new(self))
1367
0
    }
1368
1369
    /// Create a sort configuration from an existing expression.
1370
    ///
1371
    /// ```
1372
    /// # use datafusion_expr::col;
1373
    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
1374
    /// ```
1375
0
    pub fn sort(self, asc: bool, nulls_first: bool) -> Sort {
1376
0
        Sort::new(self, asc, nulls_first)
1377
0
    }
1378
1379
    /// Return `IsTrue(Box(self))`
1380
0
    pub fn is_true(self) -> Expr {
1381
0
        Expr::IsTrue(Box::new(self))
1382
0
    }
1383
1384
    /// Return `IsNotTrue(Box(self))`
1385
0
    pub fn is_not_true(self) -> Expr {
1386
0
        Expr::IsNotTrue(Box::new(self))
1387
0
    }
1388
1389
    /// Return `IsFalse(Box(self))`
1390
0
    pub fn is_false(self) -> Expr {
1391
0
        Expr::IsFalse(Box::new(self))
1392
0
    }
1393
1394
    /// Return `IsNotFalse(Box(self))`
1395
0
    pub fn is_not_false(self) -> Expr {
1396
0
        Expr::IsNotFalse(Box::new(self))
1397
0
    }
1398
1399
    /// Return `IsUnknown(Box(self))`
1400
0
    pub fn is_unknown(self) -> Expr {
1401
0
        Expr::IsUnknown(Box::new(self))
1402
0
    }
1403
1404
    /// Return `IsNotUnknown(Box(self))`
1405
0
    pub fn is_not_unknown(self) -> Expr {
1406
0
        Expr::IsNotUnknown(Box::new(self))
1407
0
    }
1408
1409
    /// return `self BETWEEN low AND high`
1410
0
    pub fn between(self, low: Expr, high: Expr) -> Expr {
1411
0
        Expr::Between(Between::new(
1412
0
            Box::new(self),
1413
0
            false,
1414
0
            Box::new(low),
1415
0
            Box::new(high),
1416
0
        ))
1417
0
    }
1418
1419
    /// return `self NOT BETWEEN low AND high`
1420
0
    pub fn not_between(self, low: Expr, high: Expr) -> Expr {
1421
0
        Expr::Between(Between::new(
1422
0
            Box::new(self),
1423
0
            true,
1424
0
            Box::new(low),
1425
0
            Box::new(high),
1426
0
        ))
1427
0
    }
1428
1429
    #[deprecated(since = "39.0.0", note = "use try_as_col instead")]
1430
0
    pub fn try_into_col(&self) -> Result<Column> {
1431
0
        match self {
1432
0
            Expr::Column(it) => Ok(it.clone()),
1433
0
            _ => plan_err!("Could not coerce '{self}' into Column!"),
1434
        }
1435
0
    }
1436
1437
    /// Return a reference to the inner `Column` if any
1438
    ///
1439
    /// returns `None` if the expression is not a `Column`
1440
    ///
1441
    /// Note: None may be returned for expressions that are not `Column` but
1442
    /// are convertible to `Column` such as `Cast` expressions.
1443
    ///
1444
    /// Example
1445
    /// ```
1446
    /// # use datafusion_common::Column;
1447
    /// use datafusion_expr::{col, Expr};
1448
    /// let expr = col("foo");
1449
    /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo")));
1450
    ///
1451
    /// let expr = col("foo").alias("bar");
1452
    /// assert_eq!(expr.try_as_col(), None);
1453
    /// ```
1454
0
    pub fn try_as_col(&self) -> Option<&Column> {
1455
0
        if let Expr::Column(it) = self {
1456
0
            Some(it)
1457
        } else {
1458
0
            None
1459
        }
1460
0
    }
1461
1462
    /// Returns the inner `Column` if any. This is a specialized version of
1463
    /// [`Self::try_as_col`] that take Cast expressions into account when the
1464
    /// expression is as on condition for joins.
1465
    ///
1466
    /// Called this method when you are sure that the expression is a `Column`
1467
    /// or a `Cast` expression that wraps a `Column`.
1468
0
    pub fn get_as_join_column(&self) -> Option<&Column> {
1469
0
        match self {
1470
0
            Expr::Column(c) => Some(c),
1471
0
            Expr::Cast(Cast { expr, .. }) => match &**expr {
1472
0
                Expr::Column(c) => Some(c),
1473
0
                _ => None,
1474
            },
1475
0
            _ => None,
1476
        }
1477
0
    }
1478
1479
    /// Return all referenced columns of this expression.
1480
    #[deprecated(since = "40.0.0", note = "use Expr::column_refs instead")]
1481
0
    pub fn to_columns(&self) -> Result<HashSet<Column>> {
1482
0
        let mut using_columns = HashSet::new();
1483
0
        expr_to_columns(self, &mut using_columns)?;
1484
1485
0
        Ok(using_columns)
1486
0
    }
1487
1488
    /// Return all references to columns in this expression.
1489
    ///
1490
    /// # Example
1491
    /// ```
1492
    /// # use std::collections::HashSet;
1493
    /// # use datafusion_common::Column;
1494
    /// # use datafusion_expr::col;
1495
    /// // For an expression `a + (b * a)`
1496
    /// let expr = col("a") + (col("b") * col("a"));
1497
    /// let refs = expr.column_refs();
1498
    /// // refs contains "a" and "b"
1499
    /// assert_eq!(refs.len(), 2);
1500
    /// assert!(refs.contains(&Column::new_unqualified("a")));
1501
    /// assert!(refs.contains(&Column::new_unqualified("b")));
1502
    /// ```
1503
0
    pub fn column_refs(&self) -> HashSet<&Column> {
1504
0
        let mut using_columns = HashSet::new();
1505
0
        self.add_column_refs(&mut using_columns);
1506
0
        using_columns
1507
0
    }
1508
1509
    /// Adds references to all columns in this expression to the set
1510
    ///
1511
    /// See [`Self::column_refs`] for details
1512
0
    pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) {
1513
0
        self.apply(|expr| {
1514
0
            if let Expr::Column(col) = expr {
1515
0
                set.insert(col);
1516
0
            }
1517
0
            Ok(TreeNodeRecursion::Continue)
1518
0
        })
1519
0
        .expect("traversal is infallible");
1520
0
    }
1521
1522
    /// Return all references to columns and their occurrence counts in the expression.
1523
    ///
1524
    /// # Example
1525
    /// ```
1526
    /// # use std::collections::HashMap;
1527
    /// # use datafusion_common::Column;
1528
    /// # use datafusion_expr::col;
1529
    /// // For an expression `a + (b * a)`
1530
    /// let expr = col("a") + (col("b") * col("a"));
1531
    /// let mut refs = expr.column_refs_counts();
1532
    /// // refs contains "a" and "b"
1533
    /// assert_eq!(refs.len(), 2);
1534
    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
1535
    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
1536
    /// ```
1537
0
    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
1538
0
        let mut map = HashMap::new();
1539
0
        self.add_column_ref_counts(&mut map);
1540
0
        map
1541
0
    }
1542
1543
    /// Adds references to all columns and their occurrence counts in the expression to
1544
    /// the map.
1545
    ///
1546
    /// See [`Self::column_refs_counts`] for details
1547
0
    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) {
1548
0
        self.apply(|expr| {
1549
0
            if let Expr::Column(col) = expr {
1550
0
                *map.entry(col).or_default() += 1;
1551
0
            }
1552
0
            Ok(TreeNodeRecursion::Continue)
1553
0
        })
1554
0
        .expect("traversal is infallible");
1555
0
    }
1556
1557
    /// Returns true if there are any column references in this Expr
1558
0
    pub fn any_column_refs(&self) -> bool {
1559
0
        self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
1560
0
            .unwrap()
1561
0
    }
1562
1563
    /// Return true when the expression contains out reference(correlated) expressions.
1564
0
    pub fn contains_outer(&self) -> bool {
1565
0
        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. })))
1566
0
            .unwrap()
1567
0
    }
1568
1569
    /// Returns true if the expression node is volatile, i.e. whether it can return
1570
    /// different results when evaluated multiple times with the same input.
1571
    /// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
1572
    /// - `rand()` returns `true`,
1573
    /// - `a + rand()` returns `false`
1574
0
    pub fn is_volatile_node(&self) -> bool {
1575
0
        matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
1576
0
    }
1577
1578
    /// Returns true if the expression is volatile, i.e. whether it can return different
1579
    /// results when evaluated multiple times with the same input.
1580
0
    pub fn is_volatile(&self) -> Result<bool> {
1581
0
        self.exists(|expr| Ok(expr.is_volatile_node()))
1582
0
    }
1583
1584
    /// Recursively find all [`Expr::Placeholder`] expressions, and
1585
    /// to infer their [`DataType`] from the context of their use.
1586
    ///
1587
    /// For example, gicen an expression like `<int32> = $0` will infer `$0` to
1588
    /// have type `int32`.
1589
0
    pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<Expr> {
1590
0
        self.transform(|mut expr| {
1591
            // Default to assuming the arguments are the same type
1592
0
            if let Expr::BinaryExpr(BinaryExpr { left, op: _, right }) = &mut expr {
1593
0
                rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?;
1594
0
                rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?;
1595
0
            };
1596
            if let Expr::Between(Between {
1597
0
                expr,
1598
0
                negated: _,
1599
0
                low,
1600
0
                high,
1601
0
            }) = &mut expr
1602
            {
1603
0
                rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
1604
0
                rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
1605
0
            }
1606
0
            Ok(Transformed::yes(expr))
1607
0
        })
1608
0
        .data()
1609
0
    }
1610
1611
    /// Returns true if some of this `exprs` subexpressions may not be evaluated
1612
    /// and thus any side effects (like divide by zero) may not be encountered
1613
0
    pub fn short_circuits(&self) -> bool {
1614
0
        match self {
1615
0
            Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(),
1616
0
            Expr::BinaryExpr(BinaryExpr { op, .. }) => {
1617
0
                matches!(op, Operator::And | Operator::Or)
1618
            }
1619
0
            Expr::Case { .. } => true,
1620
            // Use explicit pattern match instead of a default
1621
            // implementation, so that in the future if someone adds
1622
            // new Expr types, they will check here as well
1623
            Expr::AggregateFunction(..)
1624
            | Expr::Alias(..)
1625
            | Expr::Between(..)
1626
            | Expr::Cast(..)
1627
            | Expr::Column(..)
1628
            | Expr::Exists(..)
1629
            | Expr::GroupingSet(..)
1630
            | Expr::InList(..)
1631
            | Expr::InSubquery(..)
1632
            | Expr::IsFalse(..)
1633
            | Expr::IsNotFalse(..)
1634
            | Expr::IsNotNull(..)
1635
            | Expr::IsNotTrue(..)
1636
            | Expr::IsNotUnknown(..)
1637
            | Expr::IsNull(..)
1638
            | Expr::IsTrue(..)
1639
            | Expr::IsUnknown(..)
1640
            | Expr::Like(..)
1641
            | Expr::ScalarSubquery(..)
1642
            | Expr::ScalarVariable(_, _)
1643
            | Expr::SimilarTo(..)
1644
            | Expr::Not(..)
1645
            | Expr::Negative(..)
1646
            | Expr::OuterReferenceColumn(_, _)
1647
            | Expr::TryCast(..)
1648
            | Expr::Unnest(..)
1649
            | Expr::Wildcard { .. }
1650
            | Expr::WindowFunction(..)
1651
            | Expr::Literal(..)
1652
0
            | Expr::Placeholder(..) => false,
1653
        }
1654
0
    }
1655
1656
    /// Hashes the direct content of an `Expr` without recursing into its children.
1657
    ///
1658
    /// This method is useful to incrementally compute hashes, such as  in
1659
    /// `CommonSubexprEliminate` which builds a deep hash of a node and its descendants
1660
    /// during the bottom-up phase of the first traversal and so avoid computing the hash
1661
    /// of the node and then the hash of its descendants separately.
1662
    ///
1663
    /// If a node doesn't have any children then this method is similar to `.hash()`, but
1664
    /// not necessarily returns the same value.
1665
    ///
1666
    /// As it is pretty easy to forget changing this method when `Expr` changes the
1667
    /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
1668
    /// compile time.
1669
0
    pub fn hash_node<H: Hasher>(&self, hasher: &mut H) {
1670
0
        mem::discriminant(self).hash(hasher);
1671
0
        match self {
1672
            Expr::Alias(Alias {
1673
0
                expr: _expr,
1674
0
                relation,
1675
0
                name,
1676
0
            }) => {
1677
0
                relation.hash(hasher);
1678
0
                name.hash(hasher);
1679
0
            }
1680
0
            Expr::Column(column) => {
1681
0
                column.hash(hasher);
1682
0
            }
1683
0
            Expr::ScalarVariable(data_type, name) => {
1684
0
                data_type.hash(hasher);
1685
0
                name.hash(hasher);
1686
0
            }
1687
0
            Expr::Literal(scalar_value) => {
1688
0
                scalar_value.hash(hasher);
1689
0
            }
1690
            Expr::BinaryExpr(BinaryExpr {
1691
0
                left: _left,
1692
0
                op,
1693
0
                right: _right,
1694
0
            }) => {
1695
0
                op.hash(hasher);
1696
0
            }
1697
            Expr::Like(Like {
1698
0
                negated,
1699
0
                expr: _expr,
1700
0
                pattern: _pattern,
1701
0
                escape_char,
1702
0
                case_insensitive,
1703
            })
1704
            | Expr::SimilarTo(Like {
1705
0
                negated,
1706
0
                expr: _expr,
1707
0
                pattern: _pattern,
1708
0
                escape_char,
1709
0
                case_insensitive,
1710
0
            }) => {
1711
0
                negated.hash(hasher);
1712
0
                escape_char.hash(hasher);
1713
0
                case_insensitive.hash(hasher);
1714
0
            }
1715
0
            Expr::Not(_expr)
1716
0
            | Expr::IsNotNull(_expr)
1717
0
            | Expr::IsNull(_expr)
1718
0
            | Expr::IsTrue(_expr)
1719
0
            | Expr::IsFalse(_expr)
1720
0
            | Expr::IsUnknown(_expr)
1721
0
            | Expr::IsNotTrue(_expr)
1722
0
            | Expr::IsNotFalse(_expr)
1723
0
            | Expr::IsNotUnknown(_expr)
1724
0
            | Expr::Negative(_expr) => {}
1725
            Expr::Between(Between {
1726
0
                expr: _expr,
1727
0
                negated,
1728
0
                low: _low,
1729
0
                high: _high,
1730
0
            }) => {
1731
0
                negated.hash(hasher);
1732
0
            }
1733
            Expr::Case(Case {
1734
0
                expr: _expr,
1735
0
                when_then_expr: _when_then_expr,
1736
0
                else_expr: _else_expr,
1737
0
            }) => {}
1738
            Expr::Cast(Cast {
1739
0
                expr: _expr,
1740
0
                data_type,
1741
            })
1742
            | Expr::TryCast(TryCast {
1743
0
                expr: _expr,
1744
0
                data_type,
1745
0
            }) => {
1746
0
                data_type.hash(hasher);
1747
0
            }
1748
0
            Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
1749
0
                func.hash(hasher);
1750
0
            }
1751
            Expr::AggregateFunction(AggregateFunction {
1752
0
                func,
1753
0
                args: _args,
1754
0
                distinct,
1755
0
                filter: _filter,
1756
0
                order_by: _order_by,
1757
0
                null_treatment,
1758
0
            }) => {
1759
0
                func.hash(hasher);
1760
0
                distinct.hash(hasher);
1761
0
                null_treatment.hash(hasher);
1762
0
            }
1763
            Expr::WindowFunction(WindowFunction {
1764
0
                fun,
1765
0
                args: _args,
1766
0
                partition_by: _partition_by,
1767
0
                order_by: _order_by,
1768
0
                window_frame,
1769
0
                null_treatment,
1770
0
            }) => {
1771
0
                fun.hash(hasher);
1772
0
                window_frame.hash(hasher);
1773
0
                null_treatment.hash(hasher);
1774
0
            }
1775
            Expr::InList(InList {
1776
0
                expr: _expr,
1777
0
                list: _list,
1778
0
                negated,
1779
0
            }) => {
1780
0
                negated.hash(hasher);
1781
0
            }
1782
0
            Expr::Exists(Exists { subquery, negated }) => {
1783
0
                subquery.hash(hasher);
1784
0
                negated.hash(hasher);
1785
0
            }
1786
            Expr::InSubquery(InSubquery {
1787
0
                expr: _expr,
1788
0
                subquery,
1789
0
                negated,
1790
0
            }) => {
1791
0
                subquery.hash(hasher);
1792
0
                negated.hash(hasher);
1793
0
            }
1794
0
            Expr::ScalarSubquery(subquery) => {
1795
0
                subquery.hash(hasher);
1796
0
            }
1797
0
            Expr::Wildcard { qualifier, options } => {
1798
0
                qualifier.hash(hasher);
1799
0
                options.hash(hasher);
1800
0
            }
1801
0
            Expr::GroupingSet(grouping_set) => {
1802
0
                mem::discriminant(grouping_set).hash(hasher);
1803
0
                match grouping_set {
1804
0
                    GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
1805
0
                    GroupingSet::GroupingSets(_exprs) => {}
1806
                }
1807
            }
1808
0
            Expr::Placeholder(place_holder) => {
1809
0
                place_holder.hash(hasher);
1810
0
            }
1811
0
            Expr::OuterReferenceColumn(data_type, column) => {
1812
0
                data_type.hash(hasher);
1813
0
                column.hash(hasher);
1814
0
            }
1815
0
            Expr::Unnest(Unnest { expr: _expr }) => {}
1816
        };
1817
0
    }
1818
}
1819
1820
// modifies expr if it is a placeholder with datatype of right
1821
0
fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
1822
0
    if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr {
1823
0
        if data_type.is_none() {
1824
0
            let other_dt = other.get_type(schema);
1825
0
            match other_dt {
1826
0
                Err(e) => {
1827
0
                    Err(e.context(format!(
1828
0
                        "Can not find type of {other} needed to infer type of {expr}"
1829
0
                    )))?;
1830
                }
1831
0
                Ok(dt) => {
1832
0
                    *data_type = Some(dt);
1833
0
                }
1834
            }
1835
0
        };
1836
0
    }
1837
0
    Ok(())
1838
0
}
1839
1840
#[macro_export]
1841
macro_rules! expr_vec_fmt {
1842
    ( $ARRAY:expr ) => {{
1843
        $ARRAY
1844
            .iter()
1845
0
            .map(|e| format!("{e}"))
1846
            .collect::<Vec<String>>()
1847
            .join(", ")
1848
    }};
1849
}
1850
1851
struct SchemaDisplay<'a>(&'a Expr);
1852
impl<'a> Display for SchemaDisplay<'a> {
1853
0
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1854
0
        match self.0 {
1855
            // The same as Display
1856
            Expr::Column(_)
1857
            | Expr::Literal(_)
1858
            | Expr::ScalarVariable(..)
1859
            | Expr::OuterReferenceColumn(..)
1860
            | Expr::Placeholder(_)
1861
0
            | Expr::Wildcard { .. } => write!(f, "{}", self.0),
1862
1863
            Expr::AggregateFunction(AggregateFunction {
1864
0
                func,
1865
0
                args,
1866
0
                distinct,
1867
0
                filter,
1868
0
                order_by,
1869
0
                null_treatment,
1870
0
            }) => {
1871
0
                write!(
1872
0
                    f,
1873
0
                    "{}({}{})",
1874
0
                    func.name(),
1875
0
                    if *distinct { "DISTINCT " } else { "" },
1876
0
                    schema_name_from_exprs_comma_seperated_without_space(args)?
1877
0
                )?;
1878
1879
0
                if let Some(null_treatment) = null_treatment {
1880
0
                    write!(f, " {}", null_treatment)?;
1881
0
                }
1882
1883
0
                if let Some(filter) = filter {
1884
0
                    write!(f, " FILTER (WHERE {filter})")?;
1885
0
                };
1886
1887
0
                if let Some(order_by) = order_by {
1888
0
                    write!(f, " ORDER BY [{}]", schema_name_from_sorts(order_by)?)?;
1889
0
                };
1890
1891
0
                Ok(())
1892
            }
1893
            // expr is not shown since it is aliased
1894
0
            Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
1895
            Expr::Between(Between {
1896
0
                expr,
1897
0
                negated,
1898
0
                low,
1899
0
                high,
1900
0
            }) => {
1901
0
                if *negated {
1902
0
                    write!(
1903
0
                        f,
1904
0
                        "{} NOT BETWEEN {} AND {}",
1905
0
                        SchemaDisplay(expr),
1906
0
                        SchemaDisplay(low),
1907
0
                        SchemaDisplay(high),
1908
0
                    )
1909
                } else {
1910
0
                    write!(
1911
0
                        f,
1912
0
                        "{} BETWEEN {} AND {}",
1913
0
                        SchemaDisplay(expr),
1914
0
                        SchemaDisplay(low),
1915
0
                        SchemaDisplay(high),
1916
0
                    )
1917
                }
1918
            }
1919
0
            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
1920
0
                write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),)
1921
            }
1922
            Expr::Case(Case {
1923
0
                expr,
1924
0
                when_then_expr,
1925
0
                else_expr,
1926
0
            }) => {
1927
0
                write!(f, "CASE ")?;
1928
1929
0
                if let Some(e) = expr {
1930
0
                    write!(f, "{} ", SchemaDisplay(e))?;
1931
0
                }
1932
1933
0
                for (when, then) in when_then_expr {
1934
0
                    write!(
1935
0
                        f,
1936
0
                        "WHEN {} THEN {} ",
1937
0
                        SchemaDisplay(when),
1938
0
                        SchemaDisplay(then),
1939
0
                    )?;
1940
                }
1941
1942
0
                if let Some(e) = else_expr {
1943
0
                    write!(f, "ELSE {} ", SchemaDisplay(e))?;
1944
0
                }
1945
1946
0
                write!(f, "END")
1947
            }
1948
            // cast expr is not shown to be consistant with Postgres and Spark <https://github.com/apache/datafusion/pull/3222>
1949
0
            Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
1950
0
                write!(f, "{}", SchemaDisplay(expr))
1951
            }
1952
            Expr::InList(InList {
1953
0
                expr,
1954
0
                list,
1955
0
                negated,
1956
            }) => {
1957
0
                let inlist_name = schema_name_from_exprs(list)?;
1958
1959
0
                if *negated {
1960
0
                    write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name)
1961
                } else {
1962
0
                    write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name)
1963
                }
1964
            }
1965
0
            Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"),
1966
0
            Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"),
1967
0
            Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
1968
0
                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
1969
            }
1970
0
            Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
1971
0
                write!(f, "GROUPING SETS (")?;
1972
0
                for exprs in lists_of_exprs.iter() {
1973
0
                    write!(f, "({})", schema_name_from_exprs(exprs)?)?;
1974
                }
1975
0
                write!(f, ")")
1976
            }
1977
0
            Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
1978
0
                write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?)
1979
            }
1980
0
            Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)),
1981
0
            Expr::IsNotNull(expr) => {
1982
0
                write!(f, "{} IS NOT NULL", SchemaDisplay(expr))
1983
            }
1984
0
            Expr::IsUnknown(expr) => {
1985
0
                write!(f, "{} IS UNKNOWN", SchemaDisplay(expr))
1986
            }
1987
0
            Expr::IsNotUnknown(expr) => {
1988
0
                write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr))
1989
            }
1990
            Expr::InSubquery(InSubquery { negated: true, .. }) => {
1991
0
                write!(f, "NOT IN")
1992
            }
1993
0
            Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"),
1994
0
            Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)),
1995
0
            Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)),
1996
0
            Expr::IsNotTrue(expr) => {
1997
0
                write!(f, "{} IS NOT TRUE", SchemaDisplay(expr))
1998
            }
1999
0
            Expr::IsNotFalse(expr) => {
2000
0
                write!(f, "{} IS NOT FALSE", SchemaDisplay(expr))
2001
            }
2002
            Expr::Like(Like {
2003
0
                negated,
2004
0
                expr,
2005
0
                pattern,
2006
0
                escape_char,
2007
0
                case_insensitive,
2008
0
            }) => {
2009
0
                write!(
2010
0
                    f,
2011
0
                    "{} {}{} {}",
2012
0
                    SchemaDisplay(expr),
2013
0
                    if *negated { "NOT " } else { "" },
2014
0
                    if *case_insensitive { "ILIKE" } else { "LIKE" },
2015
0
                    SchemaDisplay(pattern),
2016
0
                )?;
2017
2018
0
                if let Some(char) = escape_char {
2019
0
                    write!(f, " CHAR '{char}'")?;
2020
0
                }
2021
2022
0
                Ok(())
2023
            }
2024
0
            Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)),
2025
0
            Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)),
2026
0
            Expr::Unnest(Unnest { expr }) => {
2027
0
                write!(f, "UNNEST({})", SchemaDisplay(expr))
2028
            }
2029
0
            Expr::ScalarFunction(ScalarFunction { func, args }) => {
2030
0
                match func.schema_name(args) {
2031
0
                    Ok(name) => {
2032
0
                        write!(f, "{name}")
2033
                    }
2034
0
                    Err(e) => {
2035
0
                        write!(f, "got error from schema_name {}", e)
2036
                    }
2037
                }
2038
            }
2039
0
            Expr::ScalarSubquery(Subquery { subquery, .. }) => {
2040
0
                write!(f, "{}", subquery.schema().field(0).name())
2041
            }
2042
            Expr::SimilarTo(Like {
2043
0
                negated,
2044
0
                expr,
2045
0
                pattern,
2046
0
                escape_char,
2047
0
                ..
2048
0
            }) => {
2049
0
                write!(
2050
0
                    f,
2051
0
                    "{} {} {}",
2052
0
                    SchemaDisplay(expr),
2053
0
                    if *negated {
2054
0
                        "NOT SIMILAR TO"
2055
                    } else {
2056
0
                        "SIMILAR TO"
2057
                    },
2058
0
                    SchemaDisplay(pattern),
2059
0
                )?;
2060
0
                if let Some(char) = escape_char {
2061
0
                    write!(f, " CHAR '{char}'")?;
2062
0
                }
2063
2064
0
                Ok(())
2065
            }
2066
            Expr::WindowFunction(WindowFunction {
2067
0
                fun,
2068
0
                args,
2069
0
                partition_by,
2070
0
                order_by,
2071
0
                window_frame,
2072
0
                null_treatment,
2073
0
            }) => {
2074
0
                write!(
2075
0
                    f,
2076
0
                    "{}({})",
2077
0
                    fun,
2078
0
                    schema_name_from_exprs_comma_seperated_without_space(args)?
2079
0
                )?;
2080
2081
0
                if let Some(null_treatment) = null_treatment {
2082
0
                    write!(f, " {}", null_treatment)?;
2083
0
                }
2084
2085
0
                if !partition_by.is_empty() {
2086
0
                    write!(
2087
0
                        f,
2088
0
                        " PARTITION BY [{}]",
2089
0
                        schema_name_from_exprs(partition_by)?
2090
0
                    )?;
2091
0
                }
2092
2093
0
                if !order_by.is_empty() {
2094
0
                    write!(f, " ORDER BY [{}]", schema_name_from_sorts(order_by)?)?;
2095
0
                };
2096
2097
0
                write!(f, " {window_frame}")
2098
            }
2099
        }
2100
0
    }
2101
}
2102
2103
/// Get schema_name for Vector of expressions
2104
///
2105
/// Internal usage. Please call `schema_name_from_exprs` instead
2106
// TODO: Use ", " to standardize the formatting of Vec<Expr>,
2107
// <https://github.com/apache/datafusion/issues/10364>
2108
0
pub(crate) fn schema_name_from_exprs_comma_seperated_without_space(
2109
0
    exprs: &[Expr],
2110
0
) -> Result<String, fmt::Error> {
2111
0
    schema_name_from_exprs_inner(exprs, ",")
2112
0
}
2113
2114
/// Get schema_name for Vector of expressions
2115
0
pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
2116
0
    schema_name_from_exprs_inner(exprs, ", ")
2117
0
}
2118
2119
0
fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> {
2120
0
    let mut s = String::new();
2121
0
    for (i, e) in exprs.iter().enumerate() {
2122
0
        if i > 0 {
2123
0
            write!(&mut s, "{sep}")?;
2124
0
        }
2125
0
        write!(&mut s, "{}", SchemaDisplay(e))?;
2126
    }
2127
2128
0
    Ok(s)
2129
0
}
2130
2131
0
pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
2132
0
    let mut s = String::new();
2133
0
    for (i, e) in sorts.iter().enumerate() {
2134
0
        if i > 0 {
2135
0
            write!(&mut s, ", ")?;
2136
0
        }
2137
0
        let ordering = if e.asc { "ASC" } else { "DESC" };
2138
0
        let nulls_ordering = if e.nulls_first {
2139
0
            "NULLS FIRST"
2140
        } else {
2141
0
            "NULLS LAST"
2142
        };
2143
0
        write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?;
2144
    }
2145
2146
0
    Ok(s)
2147
0
}
2148
2149
/// Format expressions for display as part of a logical plan. In many cases, this will produce
2150
/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
2151
impl fmt::Display for Expr {
2152
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2153
0
        match self {
2154
0
            Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
2155
0
            Expr::Column(c) => write!(f, "{c}"),
2156
0
            Expr::OuterReferenceColumn(_, c) => write!(f, "outer_ref({c})"),
2157
0
            Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
2158
0
            Expr::Literal(v) => write!(f, "{v:?}"),
2159
0
            Expr::Case(case) => {
2160
0
                write!(f, "CASE ")?;
2161
0
                if let Some(e) = &case.expr {
2162
0
                    write!(f, "{e} ")?;
2163
0
                }
2164
0
                for (w, t) in &case.when_then_expr {
2165
0
                    write!(f, "WHEN {w} THEN {t} ")?;
2166
                }
2167
0
                if let Some(e) = &case.else_expr {
2168
0
                    write!(f, "ELSE {e} ")?;
2169
0
                }
2170
0
                write!(f, "END")
2171
            }
2172
0
            Expr::Cast(Cast { expr, data_type }) => {
2173
0
                write!(f, "CAST({expr} AS {data_type:?})")
2174
            }
2175
0
            Expr::TryCast(TryCast { expr, data_type }) => {
2176
0
                write!(f, "TRY_CAST({expr} AS {data_type:?})")
2177
            }
2178
0
            Expr::Not(expr) => write!(f, "NOT {expr}"),
2179
0
            Expr::Negative(expr) => write!(f, "(- {expr})"),
2180
0
            Expr::IsNull(expr) => write!(f, "{expr} IS NULL"),
2181
0
            Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"),
2182
0
            Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"),
2183
0
            Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"),
2184
0
            Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"),
2185
0
            Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"),
2186
0
            Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"),
2187
0
            Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"),
2188
            Expr::Exists(Exists {
2189
0
                subquery,
2190
0
                negated: true,
2191
0
            }) => write!(f, "NOT EXISTS ({subquery:?})"),
2192
            Expr::Exists(Exists {
2193
0
                subquery,
2194
0
                negated: false,
2195
0
            }) => write!(f, "EXISTS ({subquery:?})"),
2196
            Expr::InSubquery(InSubquery {
2197
0
                expr,
2198
0
                subquery,
2199
0
                negated: true,
2200
0
            }) => write!(f, "{expr} NOT IN ({subquery:?})"),
2201
            Expr::InSubquery(InSubquery {
2202
0
                expr,
2203
0
                subquery,
2204
0
                negated: false,
2205
0
            }) => write!(f, "{expr} IN ({subquery:?})"),
2206
0
            Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"),
2207
0
            Expr::BinaryExpr(expr) => write!(f, "{expr}"),
2208
0
            Expr::ScalarFunction(fun) => {
2209
0
                fmt_function(f, fun.name(), false, &fun.args, true)
2210
            }
2211
            // TODO: use udf's display_name, need to fix the seperator issue, <https://github.com/apache/datafusion/issues/10364>
2212
            // Expr::ScalarFunction(ScalarFunction { func, args }) => {
2213
            //     write!(f, "{}", func.display_name(args).unwrap())
2214
            // }
2215
            Expr::WindowFunction(WindowFunction {
2216
0
                fun,
2217
0
                args,
2218
0
                partition_by,
2219
0
                order_by,
2220
0
                window_frame,
2221
0
                null_treatment,
2222
0
            }) => {
2223
0
                fmt_function(f, &fun.to_string(), false, args, true)?;
2224
2225
0
                if let Some(nt) = null_treatment {
2226
0
                    write!(f, "{}", nt)?;
2227
0
                }
2228
2229
0
                if !partition_by.is_empty() {
2230
0
                    write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
2231
0
                }
2232
0
                if !order_by.is_empty() {
2233
0
                    write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
2234
0
                }
2235
0
                write!(
2236
0
                    f,
2237
0
                    " {} BETWEEN {} AND {}",
2238
0
                    window_frame.units, window_frame.start_bound, window_frame.end_bound
2239
0
                )?;
2240
0
                Ok(())
2241
            }
2242
            Expr::AggregateFunction(AggregateFunction {
2243
0
                func,
2244
0
                distinct,
2245
0
                ref args,
2246
0
                filter,
2247
0
                order_by,
2248
0
                null_treatment,
2249
0
                ..
2250
0
            }) => {
2251
0
                fmt_function(f, func.name(), *distinct, args, true)?;
2252
0
                if let Some(nt) = null_treatment {
2253
0
                    write!(f, " {}", nt)?;
2254
0
                }
2255
0
                if let Some(fe) = filter {
2256
0
                    write!(f, " FILTER (WHERE {fe})")?;
2257
0
                }
2258
0
                if let Some(ob) = order_by {
2259
0
                    write!(f, " ORDER BY [{}]", expr_vec_fmt!(ob))?;
2260
0
                }
2261
0
                Ok(())
2262
            }
2263
            Expr::Between(Between {
2264
0
                expr,
2265
0
                negated,
2266
0
                low,
2267
0
                high,
2268
0
            }) => {
2269
0
                if *negated {
2270
0
                    write!(f, "{expr} NOT BETWEEN {low} AND {high}")
2271
                } else {
2272
0
                    write!(f, "{expr} BETWEEN {low} AND {high}")
2273
                }
2274
            }
2275
            Expr::Like(Like {
2276
0
                negated,
2277
0
                expr,
2278
0
                pattern,
2279
0
                escape_char,
2280
0
                case_insensitive,
2281
0
            }) => {
2282
0
                write!(f, "{expr}")?;
2283
0
                let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
2284
0
                if *negated {
2285
0
                    write!(f, " NOT")?;
2286
0
                }
2287
0
                if let Some(char) = escape_char {
2288
0
                    write!(f, " {op_name} {pattern} ESCAPE '{char}'")
2289
                } else {
2290
0
                    write!(f, " {op_name} {pattern}")
2291
                }
2292
            }
2293
            Expr::SimilarTo(Like {
2294
0
                negated,
2295
0
                expr,
2296
0
                pattern,
2297
0
                escape_char,
2298
0
                case_insensitive: _,
2299
0
            }) => {
2300
0
                write!(f, "{expr}")?;
2301
0
                if *negated {
2302
0
                    write!(f, " NOT")?;
2303
0
                }
2304
0
                if let Some(char) = escape_char {
2305
0
                    write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'")
2306
                } else {
2307
0
                    write!(f, " SIMILAR TO {pattern}")
2308
                }
2309
            }
2310
            Expr::InList(InList {
2311
0
                expr,
2312
0
                list,
2313
0
                negated,
2314
0
            }) => {
2315
0
                if *negated {
2316
0
                    write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list))
2317
                } else {
2318
0
                    write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
2319
                }
2320
            }
2321
0
            Expr::Wildcard { qualifier, options } => match qualifier {
2322
0
                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
2323
0
                None => write!(f, "*{options}"),
2324
            },
2325
0
            Expr::GroupingSet(grouping_sets) => match grouping_sets {
2326
0
                GroupingSet::Rollup(exprs) => {
2327
0
                    // ROLLUP (c0, c1, c2)
2328
0
                    write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs))
2329
                }
2330
0
                GroupingSet::Cube(exprs) => {
2331
0
                    // CUBE (c0, c1, c2)
2332
0
                    write!(f, "CUBE ({})", expr_vec_fmt!(exprs))
2333
                }
2334
0
                GroupingSet::GroupingSets(lists_of_exprs) => {
2335
0
                    // GROUPING SETS ((c0), (c1, c2), (c3, c4))
2336
0
                    write!(
2337
0
                        f,
2338
0
                        "GROUPING SETS ({})",
2339
0
                        lists_of_exprs
2340
0
                            .iter()
2341
0
                            .map(|exprs| format!("({})", expr_vec_fmt!(exprs)))
2342
0
                            .collect::<Vec<String>>()
2343
0
                            .join(", ")
2344
0
                    )
2345
                }
2346
            },
2347
0
            Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
2348
0
            Expr::Unnest(Unnest { expr }) => {
2349
0
                write!(f, "UNNEST({expr})")
2350
            }
2351
        }
2352
0
    }
2353
}
2354
2355
0
fn fmt_function(
2356
0
    f: &mut fmt::Formatter,
2357
0
    fun: &str,
2358
0
    distinct: bool,
2359
0
    args: &[Expr],
2360
0
    display: bool,
2361
0
) -> fmt::Result {
2362
0
    let args: Vec<String> = match display {
2363
0
        true => args.iter().map(|arg| format!("{arg}")).collect(),
2364
0
        false => args.iter().map(|arg| format!("{arg:?}")).collect(),
2365
    };
2366
2367
0
    let distinct_str = match distinct {
2368
0
        true => "DISTINCT ",
2369
0
        false => "",
2370
    };
2371
0
    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
2372
0
}
2373
2374
/// The name of the column (field) that this `Expr` will produce in the physical plan.
2375
/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
2376
0
pub fn physical_name(expr: &Expr) -> Result<String> {
2377
0
    if let Expr::Column(col) = expr {
2378
0
        Ok(col.name.clone())
2379
    } else {
2380
0
        Ok(expr.schema_name().to_string())
2381
    }
2382
0
}
2383
2384
#[cfg(test)]
2385
mod test {
2386
    use crate::expr_fn::col;
2387
    use crate::{
2388
        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
2389
        ScalarUDF, ScalarUDFImpl, Volatility,
2390
    };
2391
    use sqlparser::ast;
2392
    use sqlparser::ast::{Ident, IdentWithAlias};
2393
    use std::any::Any;
2394
2395
    #[test]
2396
    #[allow(deprecated)]
2397
    fn format_case_when() -> Result<()> {
2398
        let expr = case(col("a"))
2399
            .when(lit(1), lit(true))
2400
            .when(lit(0), lit(false))
2401
            .otherwise(lit(ScalarValue::Null))?;
2402
        let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END";
2403
        assert_eq!(expected, expr.canonical_name());
2404
        assert_eq!(expected, format!("{expr}"));
2405
        Ok(())
2406
    }
2407
2408
    #[test]
2409
    #[allow(deprecated)]
2410
    fn format_cast() -> Result<()> {
2411
        let expr = Expr::Cast(Cast {
2412
            expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)))),
2413
            data_type: DataType::Utf8,
2414
        });
2415
        let expected_canonical = "CAST(Float32(1.23) AS Utf8)";
2416
        assert_eq!(expected_canonical, expr.canonical_name());
2417
        assert_eq!(expected_canonical, format!("{expr}"));
2418
        // note that CAST intentionally has a name that is different from its `Display`
2419
        // representation. CAST does not change the name of expressions.
2420
        assert_eq!("Float32(1.23)", expr.schema_name().to_string());
2421
        Ok(())
2422
    }
2423
2424
    #[test]
2425
    fn test_partial_ord() {
2426
        // Test validates that partial ord is defined for Expr, not
2427
        // intended to exhaustively test all possibilities
2428
        let exp1 = col("a") + lit(1);
2429
        let exp2 = col("a") + lit(2);
2430
        let exp3 = !(col("a") + lit(2));
2431
2432
        assert!(exp1 < exp2);
2433
        assert!(exp3 > exp2);
2434
        assert!(exp1 < exp3)
2435
    }
2436
2437
    #[test]
2438
    fn test_collect_expr() -> Result<()> {
2439
        // single column
2440
        {
2441
            let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64));
2442
            let columns = expr.column_refs();
2443
            assert_eq!(1, columns.len());
2444
            assert!(columns.contains(&Column::from_name("a")));
2445
        }
2446
2447
        // multiple columns
2448
        {
2449
            let expr = col("a") + col("b") + lit(1);
2450
            let columns = expr.column_refs();
2451
            assert_eq!(2, columns.len());
2452
            assert!(columns.contains(&Column::from_name("a")));
2453
            assert!(columns.contains(&Column::from_name("b")));
2454
        }
2455
2456
        Ok(())
2457
    }
2458
2459
    #[test]
2460
    fn test_logical_ops() {
2461
        assert_eq!(
2462
            format!("{}", lit(1u32).eq(lit(2u32))),
2463
            "UInt32(1) = UInt32(2)"
2464
        );
2465
        assert_eq!(
2466
            format!("{}", lit(1u32).not_eq(lit(2u32))),
2467
            "UInt32(1) != UInt32(2)"
2468
        );
2469
        assert_eq!(
2470
            format!("{}", lit(1u32).gt(lit(2u32))),
2471
            "UInt32(1) > UInt32(2)"
2472
        );
2473
        assert_eq!(
2474
            format!("{}", lit(1u32).gt_eq(lit(2u32))),
2475
            "UInt32(1) >= UInt32(2)"
2476
        );
2477
        assert_eq!(
2478
            format!("{}", lit(1u32).lt(lit(2u32))),
2479
            "UInt32(1) < UInt32(2)"
2480
        );
2481
        assert_eq!(
2482
            format!("{}", lit(1u32).lt_eq(lit(2u32))),
2483
            "UInt32(1) <= UInt32(2)"
2484
        );
2485
        assert_eq!(
2486
            format!("{}", lit(1u32).and(lit(2u32))),
2487
            "UInt32(1) AND UInt32(2)"
2488
        );
2489
        assert_eq!(
2490
            format!("{}", lit(1u32).or(lit(2u32))),
2491
            "UInt32(1) OR UInt32(2)"
2492
        );
2493
    }
2494
2495
    #[test]
2496
    fn test_is_volatile_scalar_func() {
2497
        // UDF
2498
        #[derive(Debug)]
2499
        struct TestScalarUDF {
2500
            signature: Signature,
2501
        }
2502
        impl ScalarUDFImpl for TestScalarUDF {
2503
            fn as_any(&self) -> &dyn Any {
2504
                self
2505
            }
2506
            fn name(&self) -> &str {
2507
                "TestScalarUDF"
2508
            }
2509
2510
            fn signature(&self) -> &Signature {
2511
                &self.signature
2512
            }
2513
2514
            fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
2515
                Ok(DataType::Utf8)
2516
            }
2517
2518
            fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
2519
                Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
2520
            }
2521
        }
2522
        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
2523
            signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable),
2524
        }));
2525
        assert_ne!(udf.signature().volatility, Volatility::Volatile);
2526
2527
        let udf = Arc::new(ScalarUDF::from(TestScalarUDF {
2528
            signature: Signature::uniform(
2529
                1,
2530
                vec![DataType::Float32],
2531
                Volatility::Volatile,
2532
            ),
2533
        }));
2534
        assert_eq!(udf.signature().volatility, Volatility::Volatile);
2535
    }
2536
2537
    use super::*;
2538
2539
    #[test]
2540
    fn test_first_value_return_type() -> Result<()> {
2541
        let fun = find_df_window_func("first_value").unwrap();
2542
        let observed = fun.return_type(&[DataType::Utf8], &[true], "")?;
2543
        assert_eq!(DataType::Utf8, observed);
2544
2545
        let observed = fun.return_type(&[DataType::UInt64], &[true], "")?;
2546
        assert_eq!(DataType::UInt64, observed);
2547
2548
        Ok(())
2549
    }
2550
2551
    #[test]
2552
    fn test_last_value_return_type() -> Result<()> {
2553
        let fun = find_df_window_func("last_value").unwrap();
2554
        let observed = fun.return_type(&[DataType::Utf8], &[true], "")?;
2555
        assert_eq!(DataType::Utf8, observed);
2556
2557
        let observed = fun.return_type(&[DataType::Float64], &[true], "")?;
2558
        assert_eq!(DataType::Float64, observed);
2559
2560
        Ok(())
2561
    }
2562
2563
    #[test]
2564
    fn test_lead_return_type() -> Result<()> {
2565
        let fun = find_df_window_func("lead").unwrap();
2566
        let observed = fun.return_type(&[DataType::Utf8], &[true], "")?;
2567
        assert_eq!(DataType::Utf8, observed);
2568
2569
        let observed = fun.return_type(&[DataType::Float64], &[true], "")?;
2570
        assert_eq!(DataType::Float64, observed);
2571
2572
        Ok(())
2573
    }
2574
2575
    #[test]
2576
    fn test_lag_return_type() -> Result<()> {
2577
        let fun = find_df_window_func("lag").unwrap();
2578
        let observed = fun.return_type(&[DataType::Utf8], &[true], "")?;
2579
        assert_eq!(DataType::Utf8, observed);
2580
2581
        let observed = fun.return_type(&[DataType::Float64], &[true], "")?;
2582
        assert_eq!(DataType::Float64, observed);
2583
2584
        Ok(())
2585
    }
2586
2587
    #[test]
2588
    fn test_nth_value_return_type() -> Result<()> {
2589
        let fun = find_df_window_func("nth_value").unwrap();
2590
        let observed =
2591
            fun.return_type(&[DataType::Utf8, DataType::UInt64], &[true, true], "")?;
2592
        assert_eq!(DataType::Utf8, observed);
2593
2594
        let observed =
2595
            fun.return_type(&[DataType::Float64, DataType::UInt64], &[true, true], "")?;
2596
        assert_eq!(DataType::Float64, observed);
2597
2598
        Ok(())
2599
    }
2600
2601
    #[test]
2602
    fn test_percent_rank_return_type() -> Result<()> {
2603
        let fun = find_df_window_func("percent_rank").unwrap();
2604
        let observed = fun.return_type(&[], &[], "")?;
2605
        assert_eq!(DataType::Float64, observed);
2606
2607
        Ok(())
2608
    }
2609
2610
    #[test]
2611
    fn test_cume_dist_return_type() -> Result<()> {
2612
        let fun = find_df_window_func("cume_dist").unwrap();
2613
        let observed = fun.return_type(&[], &[], "")?;
2614
        assert_eq!(DataType::Float64, observed);
2615
2616
        Ok(())
2617
    }
2618
2619
    #[test]
2620
    fn test_ntile_return_type() -> Result<()> {
2621
        let fun = find_df_window_func("ntile").unwrap();
2622
        let observed = fun.return_type(&[DataType::Int16], &[true], "")?;
2623
        assert_eq!(DataType::UInt64, observed);
2624
2625
        Ok(())
2626
    }
2627
2628
    #[test]
2629
    fn test_window_function_case_insensitive() -> Result<()> {
2630
        let names = vec![
2631
            "rank",
2632
            "dense_rank",
2633
            "percent_rank",
2634
            "cume_dist",
2635
            "ntile",
2636
            "lag",
2637
            "lead",
2638
            "first_value",
2639
            "last_value",
2640
            "nth_value",
2641
        ];
2642
        for name in names {
2643
            let fun = find_df_window_func(name).unwrap();
2644
            let fun2 = find_df_window_func(name.to_uppercase().as_str()).unwrap();
2645
            assert_eq!(fun, fun2);
2646
            if fun.to_string() == "first_value" || fun.to_string() == "last_value" {
2647
                assert_eq!(fun.to_string(), name);
2648
            } else {
2649
                assert_eq!(fun.to_string(), name.to_uppercase());
2650
            }
2651
        }
2652
        Ok(())
2653
    }
2654
2655
    #[test]
2656
    fn test_find_df_window_function() {
2657
        assert_eq!(
2658
            find_df_window_func("cume_dist"),
2659
            Some(WindowFunctionDefinition::BuiltInWindowFunction(
2660
                built_in_window_function::BuiltInWindowFunction::CumeDist
2661
            ))
2662
        );
2663
        assert_eq!(
2664
            find_df_window_func("first_value"),
2665
            Some(WindowFunctionDefinition::BuiltInWindowFunction(
2666
                built_in_window_function::BuiltInWindowFunction::FirstValue
2667
            ))
2668
        );
2669
        assert_eq!(
2670
            find_df_window_func("LAST_value"),
2671
            Some(WindowFunctionDefinition::BuiltInWindowFunction(
2672
                built_in_window_function::BuiltInWindowFunction::LastValue
2673
            ))
2674
        );
2675
        assert_eq!(
2676
            find_df_window_func("LAG"),
2677
            Some(WindowFunctionDefinition::BuiltInWindowFunction(
2678
                built_in_window_function::BuiltInWindowFunction::Lag
2679
            ))
2680
        );
2681
        assert_eq!(
2682
            find_df_window_func("LEAD"),
2683
            Some(WindowFunctionDefinition::BuiltInWindowFunction(
2684
                built_in_window_function::BuiltInWindowFunction::Lead
2685
            ))
2686
        );
2687
        assert_eq!(find_df_window_func("not_exist"), None)
2688
    }
2689
2690
    #[test]
2691
    fn test_display_wildcard() {
2692
        assert_eq!(format!("{}", wildcard()), "*");
2693
        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
2694
        assert_eq!(
2695
            format!(
2696
                "{}",
2697
                wildcard_with_options(wildcard_options(
2698
                    Some(IlikeSelectItem {
2699
                        pattern: "c1".to_string()
2700
                    }),
2701
                    None,
2702
                    None,
2703
                    None,
2704
                    None
2705
                ))
2706
            ),
2707
            "* ILIKE 'c1'"
2708
        );
2709
        assert_eq!(
2710
            format!(
2711
                "{}",
2712
                wildcard_with_options(wildcard_options(
2713
                    None,
2714
                    Some(ExcludeSelectItem::Multiple(vec![
2715
                        Ident::from("c1"),
2716
                        Ident::from("c2")
2717
                    ])),
2718
                    None,
2719
                    None,
2720
                    None
2721
                ))
2722
            ),
2723
            "* EXCLUDE (c1, c2)"
2724
        );
2725
        assert_eq!(
2726
            format!(
2727
                "{}",
2728
                wildcard_with_options(wildcard_options(
2729
                    None,
2730
                    None,
2731
                    Some(ExceptSelectItem {
2732
                        first_element: Ident::from("c1"),
2733
                        additional_elements: vec![Ident::from("c2")]
2734
                    }),
2735
                    None,
2736
                    None
2737
                ))
2738
            ),
2739
            "* EXCEPT (c1, c2)"
2740
        );
2741
        assert_eq!(
2742
            format!(
2743
                "{}",
2744
                wildcard_with_options(wildcard_options(
2745
                    None,
2746
                    None,
2747
                    None,
2748
                    Some(PlannedReplaceSelectItem {
2749
                        items: vec![ReplaceSelectElement {
2750
                            expr: ast::Expr::Identifier(Ident::from("c1")),
2751
                            column_name: Ident::from("a1"),
2752
                            as_keyword: false
2753
                        }],
2754
                        planned_expressions: vec![]
2755
                    }),
2756
                    None
2757
                ))
2758
            ),
2759
            "* REPLACE (c1 a1)"
2760
        );
2761
        assert_eq!(
2762
            format!(
2763
                "{}",
2764
                wildcard_with_options(wildcard_options(
2765
                    None,
2766
                    None,
2767
                    None,
2768
                    None,
2769
                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
2770
                        ident: Ident::from("c1"),
2771
                        alias: Ident::from("a1")
2772
                    }]))
2773
                ))
2774
            ),
2775
            "* RENAME (c1 AS a1)"
2776
        )
2777
    }
2778
2779
    fn wildcard_options(
2780
        opt_ilike: Option<IlikeSelectItem>,
2781
        opt_exclude: Option<ExcludeSelectItem>,
2782
        opt_except: Option<ExceptSelectItem>,
2783
        opt_replace: Option<PlannedReplaceSelectItem>,
2784
        opt_rename: Option<RenameSelectItem>,
2785
    ) -> WildcardOptions {
2786
        WildcardOptions {
2787
            ilike: opt_ilike,
2788
            exclude: opt_exclude,
2789
            except: opt_except,
2790
            replace: opt_replace,
2791
            rename: opt_rename,
2792
        }
2793
    }
2794
}