Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/expr/src/logical_plan/plan.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Logical plan types
19
20
use std::cmp::Ordering;
21
use std::collections::{HashMap, HashSet};
22
use std::fmt::{self, Debug, Display, Formatter};
23
use std::hash::{Hash, Hasher};
24
use std::sync::Arc;
25
26
use super::dml::CopyTo;
27
use super::DdlStatement;
28
use crate::builder::{change_redundant_column, unnest_with_options};
29
use crate::expr::{Placeholder, Sort as SortExpr, WindowFunction};
30
use crate::expr_rewriter::{
31
    create_col_from_scalar_expr, normalize_cols, normalize_sorts, NamePreserver,
32
};
33
use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
34
use crate::logical_plan::extension::UserDefinedLogicalNode;
35
use crate::logical_plan::{DmlStatement, Statement};
36
use crate::utils::{
37
    enumerate_grouping_sets, exprlist_len, exprlist_to_fields, find_base_plan,
38
    find_out_reference_exprs, grouping_set_expr_count, grouping_set_to_exprlist,
39
    split_conjunction,
40
};
41
use crate::{
42
    build_join_schema, expr_vec_fmt, BinaryExpr, CreateMemoryTable, CreateView, Expr,
43
    ExprSchemable, LogicalPlanBuilder, Operator, TableProviderFilterPushDown,
44
    TableSource, WindowFunctionDefinition,
45
};
46
47
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
48
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
49
use datafusion_common::{
50
    aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints,
51
    DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
52
    FunctionalDependencies, ParamValues, Result, TableReference, UnnestOptions,
53
};
54
use indexmap::IndexSet;
55
56
// backwards compatibility
57
use crate::display::PgJsonVisitor;
58
use crate::tree_node::replace_sort_expressions;
59
pub use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
60
pub use datafusion_common::{JoinConstraint, JoinType};
61
62
/// A `LogicalPlan` is a node in a tree of relational operators (such as
63
/// Projection or Filter).
64
///
65
/// Represents transforming an input relation (table) to an output relation
66
/// (table) with a potentially different schema. Plans form a dataflow tree
67
/// where data flows from leaves up to the root to produce the query result.
68
///
69
/// `LogicalPlan`s can be created by the SQL query planner, the DataFrame API,
70
/// or programmatically (for example custom query languages).
71
///
72
/// # See also:
73
/// * [`Expr`]: For the expressions that are evaluated by the plan
74
/// * [`LogicalPlanBuilder`]: For building `LogicalPlan`s
75
/// * [`tree_node`]: To inspect and rewrite `LogicalPlan`s
76
///
77
/// [`tree_node`]: crate::logical_plan::tree_node
78
///
79
/// # Examples
80
///
81
/// ## Creating a LogicalPlan from SQL:
82
///
83
/// See [`SessionContext::sql`](https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql)
84
///
85
/// ## Creating a LogicalPlan from the DataFrame API:
86
///
87
/// See [`DataFrame::logical_plan`](https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.logical_plan)
88
///
89
/// ## Creating a LogicalPlan programmatically:
90
///
91
/// See [`LogicalPlanBuilder`]
92
///
93
/// # Visiting and Rewriting `LogicalPlan`s
94
///
95
/// Using the [`tree_node`] API, you can recursively walk all nodes in a
96
/// `LogicalPlan`. For example, to find all column references in a plan:
97
///
98
/// ```
99
/// # use std::collections::HashSet;
100
/// # use arrow::datatypes::{DataType, Field, Schema};
101
/// # use datafusion_expr::{Expr, col, lit, LogicalPlan, LogicalPlanBuilder, table_scan};
102
/// # use datafusion_common::tree_node::{TreeNodeRecursion, TreeNode};
103
/// # use datafusion_common::{Column, Result};
104
/// # fn employee_schema() -> Schema {
105
/// #    Schema::new(vec![
106
/// #           Field::new("name", DataType::Utf8, false),
107
/// #           Field::new("salary", DataType::Int32, false),
108
/// #       ])
109
/// #   }
110
/// // Projection(name, salary)
111
/// //   Filter(salary > 1000)
112
/// //     TableScan(employee)
113
/// # fn main() -> Result<()> {
114
/// let plan = table_scan(Some("employee"), &employee_schema(), None)?
115
///  .filter(col("salary").gt(lit(1000)))?
116
///  .project(vec![col("name")])?
117
///  .build()?;
118
///
119
/// // use apply to walk the plan and collect all expressions
120
/// let mut expressions = HashSet::new();
121
/// plan.apply(|node| {
122
///   // collect all expressions in the plan
123
///   node.apply_expressions(|expr| {
124
///    expressions.insert(expr.clone());
125
///    Ok(TreeNodeRecursion::Continue) // control walk of expressions
126
///   })?;
127
///   Ok(TreeNodeRecursion::Continue) // control walk of plan nodes
128
/// }).unwrap();
129
///
130
/// // we found the expression in projection and filter
131
/// assert_eq!(expressions.len(), 2);
132
/// println!("Found expressions: {:?}", expressions);
133
/// // found predicate in the Filter: employee.salary > 1000
134
/// let salary = Expr::Column(Column::new(Some("employee"), "salary"));
135
/// assert!(expressions.contains(&salary.gt(lit(1000))));
136
/// // found projection in the Projection: employee.name
137
/// let name = Expr::Column(Column::new(Some("employee"), "name"));
138
/// assert!(expressions.contains(&name));
139
/// # Ok(())
140
/// # }
141
/// ```
142
///
143
/// You can also rewrite plans using the [`tree_node`] API. For example, to
144
/// replace the filter predicate in a plan:
145
///
146
/// ```
147
/// # use std::collections::HashSet;
148
/// # use arrow::datatypes::{DataType, Field, Schema};
149
/// # use datafusion_expr::{Expr, col, lit, LogicalPlan, LogicalPlanBuilder, table_scan};
150
/// # use datafusion_common::tree_node::{TreeNodeRecursion, TreeNode};
151
/// # use datafusion_common::{Column, Result};
152
/// # fn employee_schema() -> Schema {
153
/// #    Schema::new(vec![
154
/// #           Field::new("name", DataType::Utf8, false),
155
/// #           Field::new("salary", DataType::Int32, false),
156
/// #       ])
157
/// #   }
158
/// // Projection(name, salary)
159
/// //   Filter(salary > 1000)
160
/// //     TableScan(employee)
161
/// # fn main() -> Result<()> {
162
/// use datafusion_common::tree_node::Transformed;
163
/// let plan = table_scan(Some("employee"), &employee_schema(), None)?
164
///  .filter(col("salary").gt(lit(1000)))?
165
///  .project(vec![col("name")])?
166
///  .build()?;
167
///
168
/// // use transform to rewrite the plan
169
/// let transformed_result = plan.transform(|node| {
170
///   // when we see the filter node
171
///   if let LogicalPlan::Filter(mut filter) = node {
172
///     // replace predicate with salary < 2000
173
///     filter.predicate = Expr::Column(Column::new(Some("employee"), "salary")).lt(lit(2000));
174
///     let new_plan = LogicalPlan::Filter(filter);
175
///     return Ok(Transformed::yes(new_plan)); // communicate the node was changed
176
///   }
177
///   // return the node unchanged
178
///   Ok(Transformed::no(node))
179
/// }).unwrap();
180
///
181
/// // Transformed result contains rewritten plan and information about
182
/// // whether the plan was changed
183
/// assert!(transformed_result.transformed);
184
/// let rewritten_plan = transformed_result.data;
185
///
186
/// // we found the filter
187
/// assert_eq!(rewritten_plan.display_indent().to_string(),
188
/// "Projection: employee.name\
189
/// \n  Filter: employee.salary < Int32(2000)\
190
/// \n    TableScan: employee");
191
/// # Ok(())
192
/// # }
193
/// ```
194
///
195
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
196
pub enum LogicalPlan {
197
    /// Evaluates an arbitrary list of expressions (essentially a
198
    /// SELECT with an expression list) on its input.
199
    Projection(Projection),
200
    /// Filters rows from its input that do not match an
201
    /// expression (essentially a WHERE clause with a predicate
202
    /// expression).
203
    ///
204
    /// Semantically, `<predicate>` is evaluated for each row of the
205
    /// input; If the value of `<predicate>` is true, the input row is
206
    /// passed to the output. If the value of `<predicate>` is false
207
    /// (or null), the row is discarded.
208
    Filter(Filter),
209
    /// Windows input based on a set of window spec and window
210
    /// function (e.g. SUM or RANK).  This is used to implement SQL
211
    /// window functions, and the `OVER` clause.
212
    Window(Window),
213
    /// Aggregates its input based on a set of grouping and aggregate
214
    /// expressions (e.g. SUM). This is used to implement SQL aggregates
215
    /// and `GROUP BY`.
216
    Aggregate(Aggregate),
217
    /// Sorts its input according to a list of sort expressions. This
218
    /// is used to implement SQL `ORDER BY`
219
    Sort(Sort),
220
    /// Join two logical plans on one or more join columns.
221
    /// This is used to implement SQL `JOIN`
222
    Join(Join),
223
    /// Apply Cross Join to two logical plans.
224
    /// This is used to implement SQL `CROSS JOIN`
225
    CrossJoin(CrossJoin),
226
    /// Repartitions the input based on a partitioning scheme. This is
227
    /// used to add parallelism and is sometimes referred to as an
228
    /// "exchange" operator in other systems
229
    Repartition(Repartition),
230
    /// Union multiple inputs with the same schema into a single
231
    /// output stream. This is used to implement SQL `UNION [ALL]` and
232
    /// `INTERSECT [ALL]`.
233
    Union(Union),
234
    /// Produces rows from a [`TableSource`], used to implement SQL
235
    /// `FROM` tables or views.
236
    TableScan(TableScan),
237
    /// Produces no rows: An empty relation with an empty schema that
238
    /// produces 0 or 1 row. This is used to implement SQL `SELECT`
239
    /// that has no values in the `FROM` clause.
240
    EmptyRelation(EmptyRelation),
241
    /// Produces the output of running another query.  This is used to
242
    /// implement SQL subqueries
243
    Subquery(Subquery),
244
    /// Aliased relation provides, or changes, the name of a relation.
245
    SubqueryAlias(SubqueryAlias),
246
    /// Skip some number of rows, and then fetch some number of rows.
247
    Limit(Limit),
248
    /// A DataFusion [`Statement`] such as `SET VARIABLE` or `START TRANSACTION`
249
    Statement(Statement),
250
    /// Values expression. See
251
    /// [Postgres VALUES](https://www.postgresql.org/docs/current/queries-values.html)
252
    /// documentation for more details. This is used to implement SQL such as
253
    /// `VALUES (1, 2), (3, 4)`
254
    Values(Values),
255
    /// Produces a relation with string representations of
256
    /// various parts of the plan. This is used to implement SQL `EXPLAIN`.
257
    Explain(Explain),
258
    /// Runs the input, and prints annotated physical plan as a string
259
    /// with execution metric. This is used to implement SQL
260
    /// `EXPLAIN ANALYZE`.
261
    Analyze(Analyze),
262
    /// Extension operator defined outside of DataFusion. This is used
263
    /// to extend DataFusion with custom relational operations that
264
    Extension(Extension),
265
    /// Remove duplicate rows from the input. This is used to
266
    /// implement SQL `SELECT DISTINCT ...`.
267
    Distinct(Distinct),
268
    /// Prepare a statement and find any bind parameters
269
    /// (e.g. `?`). This is used to implement SQL-prepared statements.
270
    Prepare(Prepare),
271
    /// Data Manipulation Language (DML): Insert / Update / Delete
272
    Dml(DmlStatement),
273
    /// Data Definition Language (DDL): CREATE / DROP TABLES / VIEWS / SCHEMAS
274
    Ddl(DdlStatement),
275
    /// `COPY TO` for writing plan results to files
276
    Copy(CopyTo),
277
    /// Describe the schema of the table. This is used to implement the
278
    /// SQL `DESCRIBE` command from MySQL.
279
    DescribeTable(DescribeTable),
280
    /// Unnest a column that contains a nested list type such as an
281
    /// ARRAY. This is used to implement SQL `UNNEST`
282
    Unnest(Unnest),
283
    /// A variadic query (e.g. "Recursive CTEs")
284
    RecursiveQuery(RecursiveQuery),
285
}
286
287
impl Default for LogicalPlan {
288
0
    fn default() -> Self {
289
0
        LogicalPlan::EmptyRelation(EmptyRelation {
290
0
            produce_one_row: false,
291
0
            schema: Arc::new(DFSchema::empty()),
292
0
        })
293
0
    }
294
}
295
296
impl LogicalPlan {
297
    /// Get a reference to the logical plan's schema
298
0
    pub fn schema(&self) -> &DFSchemaRef {
299
0
        match self {
300
0
            LogicalPlan::EmptyRelation(EmptyRelation { schema, .. }) => schema,
301
0
            LogicalPlan::Values(Values { schema, .. }) => schema,
302
            LogicalPlan::TableScan(TableScan {
303
0
                projected_schema, ..
304
0
            }) => projected_schema,
305
0
            LogicalPlan::Projection(Projection { schema, .. }) => schema,
306
0
            LogicalPlan::Filter(Filter { input, .. }) => input.schema(),
307
0
            LogicalPlan::Distinct(Distinct::All(input)) => input.schema(),
308
0
            LogicalPlan::Distinct(Distinct::On(DistinctOn { schema, .. })) => schema,
309
0
            LogicalPlan::Window(Window { schema, .. }) => schema,
310
0
            LogicalPlan::Aggregate(Aggregate { schema, .. }) => schema,
311
0
            LogicalPlan::Sort(Sort { input, .. }) => input.schema(),
312
0
            LogicalPlan::Join(Join { schema, .. }) => schema,
313
0
            LogicalPlan::CrossJoin(CrossJoin { schema, .. }) => schema,
314
0
            LogicalPlan::Repartition(Repartition { input, .. }) => input.schema(),
315
0
            LogicalPlan::Limit(Limit { input, .. }) => input.schema(),
316
0
            LogicalPlan::Statement(statement) => statement.schema(),
317
0
            LogicalPlan::Subquery(Subquery { subquery, .. }) => subquery.schema(),
318
0
            LogicalPlan::SubqueryAlias(SubqueryAlias { schema, .. }) => schema,
319
0
            LogicalPlan::Prepare(Prepare { input, .. }) => input.schema(),
320
0
            LogicalPlan::Explain(explain) => &explain.schema,
321
0
            LogicalPlan::Analyze(analyze) => &analyze.schema,
322
0
            LogicalPlan::Extension(extension) => extension.node.schema(),
323
0
            LogicalPlan::Union(Union { schema, .. }) => schema,
324
0
            LogicalPlan::DescribeTable(DescribeTable { output_schema, .. }) => {
325
0
                output_schema
326
            }
327
0
            LogicalPlan::Dml(DmlStatement { output_schema, .. }) => output_schema,
328
0
            LogicalPlan::Copy(CopyTo { input, .. }) => input.schema(),
329
0
            LogicalPlan::Ddl(ddl) => ddl.schema(),
330
0
            LogicalPlan::Unnest(Unnest { schema, .. }) => schema,
331
0
            LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => {
332
0
                // we take the schema of the static term as the schema of the entire recursive query
333
0
                static_term.schema()
334
            }
335
        }
336
0
    }
337
338
    /// Used for normalizing columns, as the fallback schemas to the main schema
339
    /// of the plan.
340
0
    pub fn fallback_normalize_schemas(&self) -> Vec<&DFSchema> {
341
0
        match self {
342
            LogicalPlan::Window(_)
343
            | LogicalPlan::Projection(_)
344
            | LogicalPlan::Aggregate(_)
345
            | LogicalPlan::Unnest(_)
346
            | LogicalPlan::Join(_)
347
0
            | LogicalPlan::CrossJoin(_) => self
348
0
                .inputs()
349
0
                .iter()
350
0
                .map(|input| input.schema().as_ref())
351
0
                .collect(),
352
0
            _ => vec![],
353
        }
354
0
    }
355
356
    /// Returns the (fixed) output schema for explain plans
357
0
    pub fn explain_schema() -> SchemaRef {
358
0
        SchemaRef::new(Schema::new(vec![
359
0
            Field::new("plan_type", DataType::Utf8, false),
360
0
            Field::new("plan", DataType::Utf8, false),
361
0
        ]))
362
0
    }
363
364
    /// Returns the (fixed) output schema for `DESCRIBE` plans
365
0
    pub fn describe_schema() -> Schema {
366
0
        Schema::new(vec![
367
0
            Field::new("column_name", DataType::Utf8, false),
368
0
            Field::new("data_type", DataType::Utf8, false),
369
0
            Field::new("is_nullable", DataType::Utf8, false),
370
0
        ])
371
0
    }
372
373
    /// Returns all expressions (non-recursively) evaluated by the current
374
    /// logical plan node. This does not include expressions in any children.
375
    ///
376
    /// Note this method `clone`s all the expressions. When possible, the
377
    /// [`tree_node`] API should be used instead of this API.
378
    ///
379
    /// The returned expressions do not necessarily represent or even
380
    /// contributed to the output schema of this node. For example,
381
    /// `LogicalPlan::Filter` returns the filter expression even though the
382
    /// output of a Filter has the same columns as the input.
383
    ///
384
    /// The expressions do contain all the columns that are used by this plan,
385
    /// so if there are columns not referenced by these expressions then
386
    /// DataFusion's optimizer attempts to optimize them away.
387
    ///
388
    /// [`tree_node`]: crate::logical_plan::tree_node
389
0
    pub fn expressions(self: &LogicalPlan) -> Vec<Expr> {
390
0
        let mut exprs = vec![];
391
0
        self.apply_expressions(|e| {
392
0
            exprs.push(e.clone());
393
0
            Ok(TreeNodeRecursion::Continue)
394
0
        })
395
0
        // closure always returns OK
396
0
        .unwrap();
397
0
        exprs
398
0
    }
399
400
    /// Returns all the out reference(correlated) expressions (recursively) in the current
401
    /// logical plan nodes and all its descendant nodes.
402
0
    pub fn all_out_ref_exprs(self: &LogicalPlan) -> Vec<Expr> {
403
0
        let mut exprs = vec![];
404
0
        self.apply_expressions(|e| {
405
0
            find_out_reference_exprs(e).into_iter().for_each(|e| {
406
0
                if !exprs.contains(&e) {
407
0
                    exprs.push(e)
408
0
                }
409
0
            });
410
0
            Ok(TreeNodeRecursion::Continue)
411
0
        })
412
0
        // closure always returns OK
413
0
        .unwrap();
414
0
        self.inputs()
415
0
            .into_iter()
416
0
            .flat_map(|child| child.all_out_ref_exprs())
417
0
            .for_each(|e| {
418
0
                if !exprs.contains(&e) {
419
0
                    exprs.push(e)
420
0
                }
421
0
            });
422
0
        exprs
423
0
    }
424
425
    #[deprecated(since = "37.0.0", note = "Use `apply_expressions` instead")]
426
0
    pub fn inspect_expressions<F, E>(self: &LogicalPlan, mut f: F) -> Result<(), E>
427
0
    where
428
0
        F: FnMut(&Expr) -> Result<(), E>,
429
0
    {
430
0
        let mut err = Ok(());
431
0
        self.apply_expressions(|e| {
432
0
            if let Err(e) = f(e) {
433
                // save the error for later (it may not be a DataFusionError
434
0
                err = Err(e);
435
0
                Ok(TreeNodeRecursion::Stop)
436
            } else {
437
0
                Ok(TreeNodeRecursion::Continue)
438
            }
439
0
        })
440
0
        // The closure always returns OK, so this will always too
441
0
        .expect("no way to return error during recursion");
442
0
443
0
        err
444
0
    }
445
446
    /// Returns all inputs / children of this `LogicalPlan` node.
447
    ///
448
    /// Note does not include inputs to inputs, or subqueries.
449
0
    pub fn inputs(&self) -> Vec<&LogicalPlan> {
450
0
        match self {
451
0
            LogicalPlan::Projection(Projection { input, .. }) => vec![input],
452
0
            LogicalPlan::Filter(Filter { input, .. }) => vec![input],
453
0
            LogicalPlan::Repartition(Repartition { input, .. }) => vec![input],
454
0
            LogicalPlan::Window(Window { input, .. }) => vec![input],
455
0
            LogicalPlan::Aggregate(Aggregate { input, .. }) => vec![input],
456
0
            LogicalPlan::Sort(Sort { input, .. }) => vec![input],
457
0
            LogicalPlan::Join(Join { left, right, .. }) => vec![left, right],
458
0
            LogicalPlan::CrossJoin(CrossJoin { left, right, .. }) => vec![left, right],
459
0
            LogicalPlan::Limit(Limit { input, .. }) => vec![input],
460
0
            LogicalPlan::Subquery(Subquery { subquery, .. }) => vec![subquery],
461
0
            LogicalPlan::SubqueryAlias(SubqueryAlias { input, .. }) => vec![input],
462
0
            LogicalPlan::Extension(extension) => extension.node.inputs(),
463
0
            LogicalPlan::Union(Union { inputs, .. }) => {
464
0
                inputs.iter().map(|arc| arc.as_ref()).collect()
465
            }
466
            LogicalPlan::Distinct(
467
0
                Distinct::All(input) | Distinct::On(DistinctOn { input, .. }),
468
0
            ) => vec![input],
469
0
            LogicalPlan::Explain(explain) => vec![&explain.plan],
470
0
            LogicalPlan::Analyze(analyze) => vec![&analyze.input],
471
0
            LogicalPlan::Dml(write) => vec![&write.input],
472
0
            LogicalPlan::Copy(copy) => vec![&copy.input],
473
0
            LogicalPlan::Ddl(ddl) => ddl.inputs(),
474
0
            LogicalPlan::Unnest(Unnest { input, .. }) => vec![input],
475
0
            LogicalPlan::Prepare(Prepare { input, .. }) => vec![input],
476
            LogicalPlan::RecursiveQuery(RecursiveQuery {
477
0
                static_term,
478
0
                recursive_term,
479
0
                ..
480
0
            }) => vec![static_term, recursive_term],
481
            // plans without inputs
482
            LogicalPlan::TableScan { .. }
483
            | LogicalPlan::Statement { .. }
484
            | LogicalPlan::EmptyRelation { .. }
485
            | LogicalPlan::Values { .. }
486
0
            | LogicalPlan::DescribeTable(_) => vec![],
487
        }
488
0
    }
489
490
    /// returns all `Using` join columns in a logical plan
491
0
    pub fn using_columns(&self) -> Result<Vec<HashSet<Column>>, DataFusionError> {
492
0
        let mut using_columns: Vec<HashSet<Column>> = vec![];
493
0
494
0
        self.apply_with_subqueries(|plan| {
495
            if let LogicalPlan::Join(Join {
496
                join_constraint: JoinConstraint::Using,
497
0
                on,
498
                ..
499
0
            }) = plan
500
            {
501
                // The join keys in using-join must be columns.
502
0
                let columns =
503
0
                    on.iter().try_fold(HashSet::new(), |mut accumu, (l, r)| {
504
0
                        let Some(l) = l.get_as_join_column() else {
505
0
                            return internal_err!(
506
0
                                "Invalid join key. Expected column, found {l:?}"
507
0
                            );
508
                        };
509
0
                        let Some(r) = r.get_as_join_column() else {
510
0
                            return internal_err!(
511
0
                                "Invalid join key. Expected column, found {r:?}"
512
0
                            );
513
                        };
514
0
                        accumu.insert(l.to_owned());
515
0
                        accumu.insert(r.to_owned());
516
0
                        Result::<_, DataFusionError>::Ok(accumu)
517
0
                    })?;
518
0
                using_columns.push(columns);
519
0
            }
520
0
            Ok(TreeNodeRecursion::Continue)
521
0
        })?;
522
523
0
        Ok(using_columns)
524
0
    }
525
526
    /// returns the first output expression of this `LogicalPlan` node.
527
0
    pub fn head_output_expr(&self) -> Result<Option<Expr>> {
528
0
        match self {
529
0
            LogicalPlan::Projection(projection) => {
530
0
                Ok(Some(projection.expr.as_slice()[0].clone()))
531
            }
532
0
            LogicalPlan::Aggregate(agg) => {
533
0
                if agg.group_expr.is_empty() {
534
0
                    Ok(Some(agg.aggr_expr.as_slice()[0].clone()))
535
                } else {
536
0
                    Ok(Some(agg.group_expr.as_slice()[0].clone()))
537
                }
538
            }
539
0
            LogicalPlan::Distinct(Distinct::On(DistinctOn { select_expr, .. })) => {
540
0
                Ok(Some(select_expr[0].clone()))
541
            }
542
0
            LogicalPlan::Filter(Filter { input, .. })
543
0
            | LogicalPlan::Distinct(Distinct::All(input))
544
0
            | LogicalPlan::Sort(Sort { input, .. })
545
0
            | LogicalPlan::Limit(Limit { input, .. })
546
0
            | LogicalPlan::Repartition(Repartition { input, .. })
547
0
            | LogicalPlan::Window(Window { input, .. }) => input.head_output_expr(),
548
            LogicalPlan::Join(Join {
549
0
                left,
550
0
                right,
551
0
                join_type,
552
0
                ..
553
0
            }) => match join_type {
554
                JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
555
0
                    if left.schema().fields().is_empty() {
556
0
                        right.head_output_expr()
557
                    } else {
558
0
                        left.head_output_expr()
559
                    }
560
                }
561
0
                JoinType::LeftSemi | JoinType::LeftAnti => left.head_output_expr(),
562
0
                JoinType::RightSemi | JoinType::RightAnti => right.head_output_expr(),
563
            },
564
0
            LogicalPlan::CrossJoin(cross) => {
565
0
                if cross.left.schema().fields().is_empty() {
566
0
                    cross.right.head_output_expr()
567
                } else {
568
0
                    cross.left.head_output_expr()
569
                }
570
            }
571
0
            LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => {
572
0
                static_term.head_output_expr()
573
            }
574
0
            LogicalPlan::Union(union) => Ok(Some(Expr::Column(Column::from(
575
0
                union.schema.qualified_field(0),
576
0
            )))),
577
0
            LogicalPlan::TableScan(table) => Ok(Some(Expr::Column(Column::from(
578
0
                table.projected_schema.qualified_field(0),
579
0
            )))),
580
0
            LogicalPlan::SubqueryAlias(subquery_alias) => {
581
0
                let expr_opt = subquery_alias.input.head_output_expr()?;
582
0
                expr_opt
583
0
                    .map(|expr| {
584
0
                        Ok(Expr::Column(create_col_from_scalar_expr(
585
0
                            &expr,
586
0
                            subquery_alias.alias.to_string(),
587
0
                        )?))
588
0
                    })
589
0
                    .map_or(Ok(None), |v| v.map(Some))
590
            }
591
0
            LogicalPlan::Subquery(_) => Ok(None),
592
            LogicalPlan::EmptyRelation(_)
593
            | LogicalPlan::Prepare(_)
594
            | LogicalPlan::Statement(_)
595
            | LogicalPlan::Values(_)
596
            | LogicalPlan::Explain(_)
597
            | LogicalPlan::Analyze(_)
598
            | LogicalPlan::Extension(_)
599
            | LogicalPlan::Dml(_)
600
            | LogicalPlan::Copy(_)
601
            | LogicalPlan::Ddl(_)
602
            | LogicalPlan::DescribeTable(_)
603
0
            | LogicalPlan::Unnest(_) => Ok(None),
604
        }
605
0
    }
606
607
    /// Recomputes schema and type information for this LogicalPlan if needed.
608
    ///
609
    /// Some `LogicalPlan`s may need to recompute their schema if the number or
610
    /// type of expressions have been changed (for example due to type
611
    /// coercion). For example [`LogicalPlan::Projection`]s schema depends on
612
    /// its expressions.
613
    ///
614
    /// Some `LogicalPlan`s schema is unaffected by any changes to their
615
    /// expressions. For example [`LogicalPlan::Filter`] schema is always the
616
    /// same as its input schema.
617
    ///
618
    /// This is useful after modifying a plans `Expr`s (or input plans) via
619
    /// methods such as [Self::map_children] and [Self::map_expressions]. Unlike
620
    /// [Self::with_new_exprs], this method does not require a new set of
621
    /// expressions or inputs plans.
622
    ///
623
    /// # Return value
624
    /// Returns an error if there is some issue recomputing the schema.
625
    ///
626
    /// # Notes
627
    ///
628
    /// * Does not recursively recompute schema for input (child) plans.
629
0
    pub fn recompute_schema(self) -> Result<Self> {
630
0
        match self {
631
            // Since expr may be different than the previous expr, schema of the projection
632
            // may change. We need to use try_new method instead of try_new_with_schema method.
633
            LogicalPlan::Projection(Projection {
634
0
                expr,
635
0
                input,
636
0
                schema: _,
637
0
            }) => Projection::try_new(expr, input).map(LogicalPlan::Projection),
638
0
            LogicalPlan::Dml(_) => Ok(self),
639
0
            LogicalPlan::Copy(_) => Ok(self),
640
0
            LogicalPlan::Values(Values { schema, values }) => {
641
0
                // todo it isn't clear why the schema is not recomputed here
642
0
                Ok(LogicalPlan::Values(Values { schema, values }))
643
            }
644
            LogicalPlan::Filter(Filter {
645
0
                predicate,
646
0
                input,
647
0
                having,
648
0
            }) => Filter::try_new_internal(predicate, input, having)
649
0
                .map(LogicalPlan::Filter),
650
0
            LogicalPlan::Repartition(_) => Ok(self),
651
            LogicalPlan::Window(Window {
652
0
                input,
653
0
                window_expr,
654
0
                schema: _,
655
0
            }) => Window::try_new(window_expr, input).map(LogicalPlan::Window),
656
            LogicalPlan::Aggregate(Aggregate {
657
0
                input,
658
0
                group_expr,
659
0
                aggr_expr,
660
0
                schema: _,
661
0
            }) => Aggregate::try_new(input, group_expr, aggr_expr)
662
0
                .map(LogicalPlan::Aggregate),
663
0
            LogicalPlan::Sort(_) => Ok(self),
664
            LogicalPlan::Join(Join {
665
0
                left,
666
0
                right,
667
0
                filter,
668
0
                join_type,
669
0
                join_constraint,
670
0
                on,
671
0
                schema: _,
672
0
                null_equals_null,
673
            }) => {
674
0
                let schema =
675
0
                    build_join_schema(left.schema(), right.schema(), &join_type)?;
676
677
0
                let new_on: Vec<_> = on
678
0
                    .into_iter()
679
0
                    .map(|equi_expr| {
680
0
                        // SimplifyExpression rule may add alias to the equi_expr.
681
0
                        (equi_expr.0.unalias(), equi_expr.1.unalias())
682
0
                    })
683
0
                    .collect();
684
0
685
0
                Ok(LogicalPlan::Join(Join {
686
0
                    left,
687
0
                    right,
688
0
                    join_type,
689
0
                    join_constraint,
690
0
                    on: new_on,
691
0
                    filter,
692
0
                    schema: DFSchemaRef::new(schema),
693
0
                    null_equals_null,
694
0
                }))
695
            }
696
            LogicalPlan::CrossJoin(CrossJoin {
697
0
                left,
698
0
                right,
699
                schema: _,
700
            }) => {
701
0
                let join_schema =
702
0
                    build_join_schema(left.schema(), right.schema(), &JoinType::Inner)?;
703
704
0
                Ok(LogicalPlan::CrossJoin(CrossJoin {
705
0
                    left,
706
0
                    right,
707
0
                    schema: join_schema.into(),
708
0
                }))
709
            }
710
0
            LogicalPlan::Subquery(_) => Ok(self),
711
            LogicalPlan::SubqueryAlias(SubqueryAlias {
712
0
                input,
713
0
                alias,
714
0
                schema: _,
715
0
            }) => SubqueryAlias::try_new(input, alias).map(LogicalPlan::SubqueryAlias),
716
0
            LogicalPlan::Limit(_) => Ok(self),
717
0
            LogicalPlan::Ddl(_) => Ok(self),
718
0
            LogicalPlan::Extension(Extension { node }) => {
719
0
                // todo make an API that does not require cloning
720
0
                // This requires a copy of the extension nodes expressions and inputs
721
0
                let expr = node.expressions();
722
0
                let inputs: Vec<_> = node.inputs().into_iter().cloned().collect();
723
0
                Ok(LogicalPlan::Extension(Extension {
724
0
                    node: node.with_exprs_and_inputs(expr, inputs)?,
725
                }))
726
            }
727
0
            LogicalPlan::Union(Union { inputs, schema }) => {
728
0
                let input_schema = inputs[0].schema();
729
                // If inputs are not pruned do not change schema
730
                // TODO this seems wrong (shouldn't we always use the schema of the input?)
731
0
                let schema = if schema.fields().len() == input_schema.fields().len() {
732
0
                    Arc::clone(&schema)
733
                } else {
734
0
                    Arc::clone(input_schema)
735
                };
736
0
                Ok(LogicalPlan::Union(Union { inputs, schema }))
737
            }
738
0
            LogicalPlan::Distinct(distinct) => {
739
0
                let distinct = match distinct {
740
0
                    Distinct::All(input) => Distinct::All(input),
741
                    Distinct::On(DistinctOn {
742
0
                        on_expr,
743
0
                        select_expr,
744
0
                        sort_expr,
745
0
                        input,
746
0
                        schema: _,
747
0
                    }) => Distinct::On(DistinctOn::try_new(
748
0
                        on_expr,
749
0
                        select_expr,
750
0
                        sort_expr,
751
0
                        input,
752
0
                    )?),
753
                };
754
0
                Ok(LogicalPlan::Distinct(distinct))
755
            }
756
0
            LogicalPlan::RecursiveQuery(_) => Ok(self),
757
0
            LogicalPlan::Analyze(_) => Ok(self),
758
0
            LogicalPlan::Explain(_) => Ok(self),
759
0
            LogicalPlan::Prepare(_) => Ok(self),
760
0
            LogicalPlan::TableScan(_) => Ok(self),
761
0
            LogicalPlan::EmptyRelation(_) => Ok(self),
762
0
            LogicalPlan::Statement(_) => Ok(self),
763
0
            LogicalPlan::DescribeTable(_) => Ok(self),
764
            LogicalPlan::Unnest(Unnest {
765
0
                input,
766
0
                exec_columns,
767
0
                options,
768
0
                ..
769
0
            }) => {
770
0
                // Update schema with unnested column type.
771
0
                unnest_with_options(Arc::unwrap_or_clone(input), exec_columns, options)
772
            }
773
        }
774
0
    }
775
776
    /// Returns a new `LogicalPlan` based on `self` with inputs and
777
    /// expressions replaced.
778
    ///
779
    /// Note this method creates an entirely new node, which requires a large
780
    /// amount of clone'ing. When possible, the [`tree_node`] API should be used
781
    /// instead of this API.
782
    ///
783
    /// The exprs correspond to the same order of expressions returned
784
    /// by [`Self::expressions`]. This function is used by optimizers
785
    /// to rewrite plans using the following pattern:
786
    ///
787
    /// [`tree_node`]: crate::logical_plan::tree_node
788
    ///
789
    /// ```text
790
    /// let new_inputs = optimize_children(..., plan, props);
791
    ///
792
    /// // get the plans expressions to optimize
793
    /// let exprs = plan.expressions();
794
    ///
795
    /// // potentially rewrite plan expressions
796
    /// let rewritten_exprs = rewrite_exprs(exprs);
797
    ///
798
    /// // create new plan using rewritten_exprs in same position
799
    /// let new_plan = plan.new_with_exprs(rewritten_exprs, new_inputs);
800
    /// ```
801
0
    pub fn with_new_exprs(
802
0
        &self,
803
0
        mut expr: Vec<Expr>,
804
0
        inputs: Vec<LogicalPlan>,
805
0
    ) -> Result<LogicalPlan> {
806
0
        match self {
807
            // Since expr may be different than the previous expr, schema of the projection
808
            // may change. We need to use try_new method instead of try_new_with_schema method.
809
            LogicalPlan::Projection(Projection { .. }) => {
810
0
                let input = self.only_input(inputs)?;
811
0
                Projection::try_new(expr, Arc::new(input)).map(LogicalPlan::Projection)
812
            }
813
            LogicalPlan::Dml(DmlStatement {
814
0
                table_name,
815
0
                table_schema,
816
0
                op,
817
0
                ..
818
0
            }) => {
819
0
                self.assert_no_expressions(expr)?;
820
0
                let input = self.only_input(inputs)?;
821
0
                Ok(LogicalPlan::Dml(DmlStatement::new(
822
0
                    table_name.clone(),
823
0
                    Arc::clone(table_schema),
824
0
                    op.clone(),
825
0
                    Arc::new(input),
826
0
                )))
827
            }
828
            LogicalPlan::Copy(CopyTo {
829
                input: _,
830
0
                output_url,
831
0
                file_type,
832
0
                options,
833
0
                partition_by,
834
0
            }) => {
835
0
                self.assert_no_expressions(expr)?;
836
0
                let input = self.only_input(inputs)?;
837
0
                Ok(LogicalPlan::Copy(CopyTo {
838
0
                    input: Arc::new(input),
839
0
                    output_url: output_url.clone(),
840
0
                    file_type: Arc::clone(file_type),
841
0
                    options: options.clone(),
842
0
                    partition_by: partition_by.clone(),
843
0
                }))
844
            }
845
0
            LogicalPlan::Values(Values { schema, .. }) => {
846
0
                self.assert_no_inputs(inputs)?;
847
0
                Ok(LogicalPlan::Values(Values {
848
0
                    schema: Arc::clone(schema),
849
0
                    values: expr
850
0
                        .chunks_exact(schema.fields().len())
851
0
                        .map(|s| s.to_vec())
852
0
                        .collect(),
853
0
                }))
854
            }
855
            LogicalPlan::Filter { .. } => {
856
0
                let predicate = self.only_expr(expr)?;
857
0
                let input = self.only_input(inputs)?;
858
859
0
                Filter::try_new(predicate, Arc::new(input)).map(LogicalPlan::Filter)
860
            }
861
            LogicalPlan::Repartition(Repartition {
862
0
                partitioning_scheme,
863
0
                ..
864
0
            }) => match partitioning_scheme {
865
0
                Partitioning::RoundRobinBatch(n) => {
866
0
                    self.assert_no_expressions(expr)?;
867
0
                    let input = self.only_input(inputs)?;
868
0
                    Ok(LogicalPlan::Repartition(Repartition {
869
0
                        partitioning_scheme: Partitioning::RoundRobinBatch(*n),
870
0
                        input: Arc::new(input),
871
0
                    }))
872
                }
873
0
                Partitioning::Hash(_, n) => {
874
0
                    let input = self.only_input(inputs)?;
875
0
                    Ok(LogicalPlan::Repartition(Repartition {
876
0
                        partitioning_scheme: Partitioning::Hash(expr, *n),
877
0
                        input: Arc::new(input),
878
0
                    }))
879
                }
880
                Partitioning::DistributeBy(_) => {
881
0
                    let input = self.only_input(inputs)?;
882
0
                    Ok(LogicalPlan::Repartition(Repartition {
883
0
                        partitioning_scheme: Partitioning::DistributeBy(expr),
884
0
                        input: Arc::new(input),
885
0
                    }))
886
                }
887
            },
888
0
            LogicalPlan::Window(Window { window_expr, .. }) => {
889
0
                assert_eq!(window_expr.len(), expr.len());
890
0
                let input = self.only_input(inputs)?;
891
0
                Window::try_new(expr, Arc::new(input)).map(LogicalPlan::Window)
892
            }
893
0
            LogicalPlan::Aggregate(Aggregate { group_expr, .. }) => {
894
0
                let input = self.only_input(inputs)?;
895
                // group exprs are the first expressions
896
0
                let agg_expr = expr.split_off(group_expr.len());
897
0
898
0
                Aggregate::try_new(Arc::new(input), expr, agg_expr)
899
0
                    .map(LogicalPlan::Aggregate)
900
            }
901
            LogicalPlan::Sort(Sort {
902
0
                expr: sort_expr,
903
0
                fetch,
904
                ..
905
            }) => {
906
0
                let input = self.only_input(inputs)?;
907
0
                Ok(LogicalPlan::Sort(Sort {
908
0
                    expr: replace_sort_expressions(sort_expr.clone(), expr),
909
0
                    input: Arc::new(input),
910
0
                    fetch: *fetch,
911
0
                }))
912
            }
913
            LogicalPlan::Join(Join {
914
0
                join_type,
915
0
                join_constraint,
916
0
                on,
917
0
                null_equals_null,
918
                ..
919
            }) => {
920
0
                let (left, right) = self.only_two_inputs(inputs)?;
921
0
                let schema = build_join_schema(left.schema(), right.schema(), join_type)?;
922
923
0
                let equi_expr_count = on.len();
924
0
                assert!(expr.len() >= equi_expr_count);
925
926
                // Assume that the last expr, if any,
927
                // is the filter_expr (non equality predicate from ON clause)
928
0
                let filter_expr = if expr.len() > equi_expr_count {
929
0
                    expr.pop()
930
                } else {
931
0
                    None
932
                };
933
934
                // The first part of expr is equi-exprs,
935
                // and the struct of each equi-expr is like `left-expr = right-expr`.
936
0
                assert_eq!(expr.len(), equi_expr_count);
937
0
                let new_on = expr.into_iter().map(|equi_expr| {
938
0
                    // SimplifyExpression rule may add alias to the equi_expr.
939
0
                    let unalias_expr = equi_expr.clone().unalias();
940
0
                    if let Expr::BinaryExpr(BinaryExpr { left, op: Operator::Eq, right }) = unalias_expr {
941
0
                        Ok((*left, *right))
942
                    } else {
943
0
                        internal_err!(
944
0
                            "The front part expressions should be an binary equality expression, actual:{equi_expr}"
945
0
                        )
946
                    }
947
0
                }).collect::<Result<Vec<(Expr, Expr)>>>()?;
948
949
0
                Ok(LogicalPlan::Join(Join {
950
0
                    left: Arc::new(left),
951
0
                    right: Arc::new(right),
952
0
                    join_type: *join_type,
953
0
                    join_constraint: *join_constraint,
954
0
                    on: new_on,
955
0
                    filter: filter_expr,
956
0
                    schema: DFSchemaRef::new(schema),
957
0
                    null_equals_null: *null_equals_null,
958
0
                }))
959
            }
960
            LogicalPlan::CrossJoin(_) => {
961
0
                self.assert_no_expressions(expr)?;
962
0
                let (left, right) = self.only_two_inputs(inputs)?;
963
0
                LogicalPlanBuilder::from(left).cross_join(right)?.build()
964
            }
965
            LogicalPlan::Subquery(Subquery {
966
0
                outer_ref_columns, ..
967
0
            }) => {
968
0
                self.assert_no_expressions(expr)?;
969
0
                let input = self.only_input(inputs)?;
970
0
                let subquery = LogicalPlanBuilder::from(input).build()?;
971
0
                Ok(LogicalPlan::Subquery(Subquery {
972
0
                    subquery: Arc::new(subquery),
973
0
                    outer_ref_columns: outer_ref_columns.clone(),
974
0
                }))
975
            }
976
0
            LogicalPlan::SubqueryAlias(SubqueryAlias { alias, .. }) => {
977
0
                self.assert_no_expressions(expr)?;
978
0
                let input = self.only_input(inputs)?;
979
0
                SubqueryAlias::try_new(Arc::new(input), alias.clone())
980
0
                    .map(LogicalPlan::SubqueryAlias)
981
            }
982
0
            LogicalPlan::Limit(Limit { skip, fetch, .. }) => {
983
0
                self.assert_no_expressions(expr)?;
984
0
                let input = self.only_input(inputs)?;
985
0
                Ok(LogicalPlan::Limit(Limit {
986
0
                    skip: *skip,
987
0
                    fetch: *fetch,
988
0
                    input: Arc::new(input),
989
0
                }))
990
            }
991
            LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(CreateMemoryTable {
992
0
                name,
993
0
                if_not_exists,
994
0
                or_replace,
995
0
                column_defaults,
996
0
                ..
997
0
            })) => {
998
0
                self.assert_no_expressions(expr)?;
999
0
                let input = self.only_input(inputs)?;
1000
0
                Ok(LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(
1001
0
                    CreateMemoryTable {
1002
0
                        input: Arc::new(input),
1003
0
                        constraints: Constraints::empty(),
1004
0
                        name: name.clone(),
1005
0
                        if_not_exists: *if_not_exists,
1006
0
                        or_replace: *or_replace,
1007
0
                        column_defaults: column_defaults.clone(),
1008
0
                    },
1009
0
                )))
1010
            }
1011
            LogicalPlan::Ddl(DdlStatement::CreateView(CreateView {
1012
0
                name,
1013
0
                or_replace,
1014
0
                definition,
1015
0
                ..
1016
0
            })) => {
1017
0
                self.assert_no_expressions(expr)?;
1018
0
                let input = self.only_input(inputs)?;
1019
0
                Ok(LogicalPlan::Ddl(DdlStatement::CreateView(CreateView {
1020
0
                    input: Arc::new(input),
1021
0
                    name: name.clone(),
1022
0
                    or_replace: *or_replace,
1023
0
                    definition: definition.clone(),
1024
0
                })))
1025
            }
1026
0
            LogicalPlan::Extension(e) => Ok(LogicalPlan::Extension(Extension {
1027
0
                node: e.node.with_exprs_and_inputs(expr, inputs)?,
1028
            })),
1029
0
            LogicalPlan::Union(Union { schema, .. }) => {
1030
0
                self.assert_no_expressions(expr)?;
1031
0
                let input_schema = inputs[0].schema();
1032
                // If inputs are not pruned do not change schema.
1033
0
                let schema = if schema.fields().len() == input_schema.fields().len() {
1034
0
                    Arc::clone(schema)
1035
                } else {
1036
0
                    Arc::clone(input_schema)
1037
                };
1038
0
                Ok(LogicalPlan::Union(Union {
1039
0
                    inputs: inputs.into_iter().map(Arc::new).collect(),
1040
0
                    schema,
1041
0
                }))
1042
            }
1043
0
            LogicalPlan::Distinct(distinct) => {
1044
0
                let distinct = match distinct {
1045
                    Distinct::All(_) => {
1046
0
                        self.assert_no_expressions(expr)?;
1047
0
                        let input = self.only_input(inputs)?;
1048
0
                        Distinct::All(Arc::new(input))
1049
                    }
1050
                    Distinct::On(DistinctOn {
1051
0
                        on_expr,
1052
0
                        select_expr,
1053
                        ..
1054
                    }) => {
1055
0
                        let input = self.only_input(inputs)?;
1056
0
                        let sort_expr = expr.split_off(on_expr.len() + select_expr.len());
1057
0
                        let select_expr = expr.split_off(on_expr.len());
1058
0
                        assert!(sort_expr.is_empty(), "with_new_exprs for Distinct does not support sort expressions");
1059
0
                        Distinct::On(DistinctOn::try_new(
1060
0
                            expr,
1061
0
                            select_expr,
1062
0
                            None, // no sort expressions accepted
1063
0
                            Arc::new(input),
1064
0
                        )?)
1065
                    }
1066
                };
1067
0
                Ok(LogicalPlan::Distinct(distinct))
1068
            }
1069
            LogicalPlan::RecursiveQuery(RecursiveQuery {
1070
0
                name, is_distinct, ..
1071
0
            }) => {
1072
0
                self.assert_no_expressions(expr)?;
1073
0
                let (static_term, recursive_term) = self.only_two_inputs(inputs)?;
1074
0
                Ok(LogicalPlan::RecursiveQuery(RecursiveQuery {
1075
0
                    name: name.clone(),
1076
0
                    static_term: Arc::new(static_term),
1077
0
                    recursive_term: Arc::new(recursive_term),
1078
0
                    is_distinct: *is_distinct,
1079
0
                }))
1080
            }
1081
0
            LogicalPlan::Analyze(a) => {
1082
0
                self.assert_no_expressions(expr)?;
1083
0
                let input = self.only_input(inputs)?;
1084
0
                Ok(LogicalPlan::Analyze(Analyze {
1085
0
                    verbose: a.verbose,
1086
0
                    schema: Arc::clone(&a.schema),
1087
0
                    input: Arc::new(input),
1088
0
                }))
1089
            }
1090
0
            LogicalPlan::Explain(e) => {
1091
0
                self.assert_no_expressions(expr)?;
1092
0
                let input = self.only_input(inputs)?;
1093
0
                Ok(LogicalPlan::Explain(Explain {
1094
0
                    verbose: e.verbose,
1095
0
                    plan: Arc::new(input),
1096
0
                    stringified_plans: e.stringified_plans.clone(),
1097
0
                    schema: Arc::clone(&e.schema),
1098
0
                    logical_optimization_succeeded: e.logical_optimization_succeeded,
1099
0
                }))
1100
            }
1101
            LogicalPlan::Prepare(Prepare {
1102
0
                name, data_types, ..
1103
0
            }) => {
1104
0
                self.assert_no_expressions(expr)?;
1105
0
                let input = self.only_input(inputs)?;
1106
0
                Ok(LogicalPlan::Prepare(Prepare {
1107
0
                    name: name.clone(),
1108
0
                    data_types: data_types.clone(),
1109
0
                    input: Arc::new(input),
1110
0
                }))
1111
            }
1112
0
            LogicalPlan::TableScan(ts) => {
1113
0
                self.assert_no_inputs(inputs)?;
1114
0
                Ok(LogicalPlan::TableScan(TableScan {
1115
0
                    filters: expr,
1116
0
                    ..ts.clone()
1117
0
                }))
1118
            }
1119
            LogicalPlan::EmptyRelation(_)
1120
            | LogicalPlan::Ddl(_)
1121
            | LogicalPlan::Statement(_)
1122
            | LogicalPlan::DescribeTable(_) => {
1123
                // All of these plan types have no inputs / exprs so should not be called
1124
0
                self.assert_no_expressions(expr)?;
1125
0
                self.assert_no_inputs(inputs)?;
1126
0
                Ok(self.clone())
1127
            }
1128
            LogicalPlan::Unnest(Unnest {
1129
0
                exec_columns: columns,
1130
0
                options,
1131
0
                ..
1132
0
            }) => {
1133
0
                self.assert_no_expressions(expr)?;
1134
0
                let input = self.only_input(inputs)?;
1135
                // Update schema with unnested column type.
1136
0
                let new_plan =
1137
0
                    unnest_with_options(input, columns.clone(), options.clone())?;
1138
0
                Ok(new_plan)
1139
            }
1140
        }
1141
0
    }
1142
1143
    /// Helper for [Self::with_new_exprs] to use when no expressions are expected.
1144
    #[inline]
1145
    #[allow(clippy::needless_pass_by_value)] // expr is moved intentionally to ensure it's not used again
1146
0
    fn assert_no_expressions(&self, expr: Vec<Expr>) -> Result<()> {
1147
0
        if !expr.is_empty() {
1148
0
            return internal_err!("{self:?} should have no exprs, got {:?}", expr);
1149
0
        }
1150
0
        Ok(())
1151
0
    }
1152
1153
    /// Helper for [Self::with_new_exprs] to use when no inputs are expected.
1154
    #[inline]
1155
    #[allow(clippy::needless_pass_by_value)] // inputs is moved intentionally to ensure it's not used again
1156
0
    fn assert_no_inputs(&self, inputs: Vec<LogicalPlan>) -> Result<()> {
1157
0
        if !inputs.is_empty() {
1158
0
            return internal_err!("{self:?} should have no inputs, got: {:?}", inputs);
1159
0
        }
1160
0
        Ok(())
1161
0
    }
1162
1163
    /// Helper for [Self::with_new_exprs] to use when exactly one expression is expected.
1164
    #[inline]
1165
0
    fn only_expr(&self, mut expr: Vec<Expr>) -> Result<Expr> {
1166
0
        if expr.len() != 1 {
1167
0
            return internal_err!(
1168
0
                "{self:?} should have exactly one expr, got {:?}",
1169
0
                expr
1170
0
            );
1171
0
        }
1172
0
        Ok(expr.remove(0))
1173
0
    }
1174
1175
    /// Helper for [Self::with_new_exprs] to use when exactly one input is expected.
1176
    #[inline]
1177
0
    fn only_input(&self, mut inputs: Vec<LogicalPlan>) -> Result<LogicalPlan> {
1178
0
        if inputs.len() != 1 {
1179
0
            return internal_err!(
1180
0
                "{self:?} should have exactly one input, got {:?}",
1181
0
                inputs
1182
0
            );
1183
0
        }
1184
0
        Ok(inputs.remove(0))
1185
0
    }
1186
1187
    /// Helper for [Self::with_new_exprs] to use when exactly two inputs are expected.
1188
    #[inline]
1189
0
    fn only_two_inputs(
1190
0
        &self,
1191
0
        mut inputs: Vec<LogicalPlan>,
1192
0
    ) -> Result<(LogicalPlan, LogicalPlan)> {
1193
0
        if inputs.len() != 2 {
1194
0
            return internal_err!(
1195
0
                "{self:?} should have exactly two inputs, got {:?}",
1196
0
                inputs
1197
0
            );
1198
0
        }
1199
0
        let right = inputs.remove(1);
1200
0
        let left = inputs.remove(0);
1201
0
        Ok((left, right))
1202
0
    }
1203
1204
    /// Replaces placeholder param values (like `$1`, `$2`) in [`LogicalPlan`]
1205
    /// with the specified `param_values`.
1206
    ///
1207
    /// [`LogicalPlan::Prepare`] are
1208
    /// converted to their inner logical plan for execution.
1209
    ///
1210
    /// # Example
1211
    /// ```
1212
    /// # use arrow::datatypes::{Field, Schema, DataType};
1213
    /// use datafusion_common::ScalarValue;
1214
    /// # use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan, placeholder};
1215
    /// # let schema = Schema::new(vec![
1216
    /// #     Field::new("id", DataType::Int32, false),
1217
    /// # ]);
1218
    /// // Build SELECT * FROM t1 WHRERE id = $1
1219
    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
1220
    ///     .filter(col("id").eq(placeholder("$1"))).unwrap()
1221
    ///     .build().unwrap();
1222
    ///
1223
    /// assert_eq!(
1224
    ///   "Filter: t1.id = $1\
1225
    ///   \n  TableScan: t1",
1226
    ///   plan.display_indent().to_string()
1227
    /// );
1228
    ///
1229
    /// // Fill in the parameter $1 with a literal 3
1230
    /// let plan = plan.with_param_values(vec![
1231
    ///   ScalarValue::from(3i32) // value at index 0 --> $1
1232
    /// ]).unwrap();
1233
    ///
1234
    /// assert_eq!(
1235
    ///    "Filter: t1.id = Int32(3)\
1236
    ///    \n  TableScan: t1",
1237
    ///    plan.display_indent().to_string()
1238
    ///  );
1239
    ///
1240
    /// // Note you can also used named parameters
1241
    /// // Build SELECT * FROM t1 WHRERE id = $my_param
1242
    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
1243
    ///     .filter(col("id").eq(placeholder("$my_param"))).unwrap()
1244
    ///     .build().unwrap()
1245
    ///     // Fill in the parameter $my_param with a literal 3
1246
    ///     .with_param_values(vec![
1247
    ///       ("my_param", ScalarValue::from(3i32)),
1248
    ///     ]).unwrap();
1249
    ///
1250
    /// assert_eq!(
1251
    ///    "Filter: t1.id = Int32(3)\
1252
    ///    \n  TableScan: t1",
1253
    ///    plan.display_indent().to_string()
1254
    ///  );
1255
    ///
1256
    /// ```
1257
0
    pub fn with_param_values(
1258
0
        self,
1259
0
        param_values: impl Into<ParamValues>,
1260
0
    ) -> Result<LogicalPlan> {
1261
0
        let param_values = param_values.into();
1262
0
        let plan_with_values = self.replace_params_with_values(&param_values)?;
1263
1264
        // unwrap Prepare
1265
0
        Ok(if let LogicalPlan::Prepare(prepare_lp) = plan_with_values {
1266
0
            param_values.verify(&prepare_lp.data_types)?;
1267
            // try and take ownership of the input if is not shared, clone otherwise
1268
0
            Arc::unwrap_or_clone(prepare_lp.input)
1269
        } else {
1270
0
            plan_with_values
1271
        })
1272
0
    }
1273
1274
    /// Returns the maximum number of rows that this plan can output, if known.
1275
    ///
1276
    /// If `None`, the plan can return any number of rows.
1277
    /// If `Some(n)` then the plan can return at most `n` rows but may return fewer.
1278
0
    pub fn max_rows(self: &LogicalPlan) -> Option<usize> {
1279
0
        match self {
1280
0
            LogicalPlan::Projection(Projection { input, .. }) => input.max_rows(),
1281
0
            LogicalPlan::Filter(filter) => {
1282
0
                if filter.is_scalar() {
1283
0
                    Some(1)
1284
                } else {
1285
0
                    filter.input.max_rows()
1286
                }
1287
            }
1288
0
            LogicalPlan::Window(Window { input, .. }) => input.max_rows(),
1289
            LogicalPlan::Aggregate(Aggregate {
1290
0
                input, group_expr, ..
1291
0
            }) => {
1292
0
                // Empty group_expr will return Some(1)
1293
0
                if group_expr
1294
0
                    .iter()
1295
0
                    .all(|expr| matches!(expr, Expr::Literal(_)))
1296
                {
1297
0
                    Some(1)
1298
                } else {
1299
0
                    input.max_rows()
1300
                }
1301
            }
1302
0
            LogicalPlan::Sort(Sort { input, fetch, .. }) => {
1303
0
                match (fetch, input.max_rows()) {
1304
0
                    (Some(fetch_limit), Some(input_max)) => {
1305
0
                        Some(input_max.min(*fetch_limit))
1306
                    }
1307
0
                    (Some(fetch_limit), None) => Some(*fetch_limit),
1308
0
                    (None, Some(input_max)) => Some(input_max),
1309
0
                    (None, None) => None,
1310
                }
1311
            }
1312
            LogicalPlan::Join(Join {
1313
0
                left,
1314
0
                right,
1315
0
                join_type,
1316
0
                ..
1317
0
            }) => match join_type {
1318
                JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
1319
0
                    match (left.max_rows(), right.max_rows()) {
1320
0
                        (Some(left_max), Some(right_max)) => {
1321
0
                            let min_rows = match join_type {
1322
0
                                JoinType::Left => left_max,
1323
0
                                JoinType::Right => right_max,
1324
0
                                JoinType::Full => left_max + right_max,
1325
0
                                _ => 0,
1326
                            };
1327
0
                            Some((left_max * right_max).max(min_rows))
1328
                        }
1329
0
                        _ => None,
1330
                    }
1331
                }
1332
0
                JoinType::LeftSemi | JoinType::LeftAnti => left.max_rows(),
1333
0
                JoinType::RightSemi | JoinType::RightAnti => right.max_rows(),
1334
            },
1335
0
            LogicalPlan::CrossJoin(CrossJoin { left, right, .. }) => {
1336
0
                match (left.max_rows(), right.max_rows()) {
1337
0
                    (Some(left_max), Some(right_max)) => Some(left_max * right_max),
1338
0
                    _ => None,
1339
                }
1340
            }
1341
0
            LogicalPlan::Repartition(Repartition { input, .. }) => input.max_rows(),
1342
0
            LogicalPlan::Union(Union { inputs, .. }) => inputs
1343
0
                .iter()
1344
0
                .map(|plan| plan.max_rows())
1345
0
                .try_fold(0usize, |mut acc, input_max| {
1346
0
                    if let Some(i_max) = input_max {
1347
0
                        acc += i_max;
1348
0
                        Some(acc)
1349
                    } else {
1350
0
                        None
1351
                    }
1352
0
                }),
1353
0
            LogicalPlan::TableScan(TableScan { fetch, .. }) => *fetch,
1354
0
            LogicalPlan::EmptyRelation(_) => Some(0),
1355
0
            LogicalPlan::RecursiveQuery(_) => None,
1356
0
            LogicalPlan::Subquery(_) => None,
1357
0
            LogicalPlan::SubqueryAlias(SubqueryAlias { input, .. }) => input.max_rows(),
1358
0
            LogicalPlan::Limit(Limit { fetch, .. }) => *fetch,
1359
            LogicalPlan::Distinct(
1360
0
                Distinct::All(input) | Distinct::On(DistinctOn { input, .. }),
1361
0
            ) => input.max_rows(),
1362
0
            LogicalPlan::Values(v) => Some(v.values.len()),
1363
0
            LogicalPlan::Unnest(_) => None,
1364
            LogicalPlan::Ddl(_)
1365
            | LogicalPlan::Explain(_)
1366
            | LogicalPlan::Analyze(_)
1367
            | LogicalPlan::Dml(_)
1368
            | LogicalPlan::Copy(_)
1369
            | LogicalPlan::DescribeTable(_)
1370
            | LogicalPlan::Prepare(_)
1371
            | LogicalPlan::Statement(_)
1372
0
            | LogicalPlan::Extension(_) => None,
1373
        }
1374
0
    }
1375
1376
    /// If this node's expressions contains any references to an outer subquery
1377
0
    pub fn contains_outer_reference(&self) -> bool {
1378
0
        let mut contains = false;
1379
0
        self.apply_expressions(|expr| {
1380
0
            Ok(if expr.contains_outer() {
1381
0
                contains = true;
1382
0
                TreeNodeRecursion::Stop
1383
            } else {
1384
0
                TreeNodeRecursion::Continue
1385
            })
1386
0
        })
1387
0
        .unwrap();
1388
0
        contains
1389
0
    }
1390
1391
    /// Get the output expressions and their corresponding columns.
1392
    ///
1393
    /// The parent node may reference the output columns of the plan by expressions, such as
1394
    /// projection over aggregate or window functions. This method helps to convert the
1395
    /// referenced expressions into columns.
1396
    ///
1397
    /// See also: [`crate::utils::columnize_expr`]
1398
0
    pub fn columnized_output_exprs(&self) -> Result<Vec<(&Expr, Column)>> {
1399
0
        match self {
1400
0
            LogicalPlan::Aggregate(aggregate) => Ok(aggregate
1401
0
                .output_expressions()?
1402
0
                .into_iter()
1403
0
                .zip(self.schema().columns())
1404
0
                .collect()),
1405
            LogicalPlan::Window(Window {
1406
0
                window_expr,
1407
0
                input,
1408
0
                schema,
1409
            }) => {
1410
                // The input could be another Window, so the result should also include the input's. For Example:
1411
                // `EXPLAIN SELECT RANK() OVER (PARTITION BY a ORDER BY b), SUM(b) OVER (PARTITION BY a) FROM t`
1412
                // Its plan is:
1413
                // Projection: RANK() PARTITION BY [t.a] ORDER BY [t.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(t.b) PARTITION BY [t.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
1414
                //   WindowAggr: windowExpr=[[SUM(CAST(t.b AS Int64)) PARTITION BY [t.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
1415
                //     WindowAggr: windowExpr=[[RANK() PARTITION BY [t.a] ORDER BY [t.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]/
1416
                //       TableScan: t projection=[a, b]
1417
0
                let mut output_exprs = input.columnized_output_exprs()?;
1418
0
                let input_len = input.schema().fields().len();
1419
0
                output_exprs.extend(
1420
0
                    window_expr
1421
0
                        .iter()
1422
0
                        .zip(schema.columns().into_iter().skip(input_len)),
1423
0
                );
1424
0
                Ok(output_exprs)
1425
            }
1426
0
            _ => Ok(vec![]),
1427
        }
1428
0
    }
1429
}
1430
1431
impl LogicalPlan {
1432
    /// Return a `LogicalPlan` with all placeholders (e.g $1 $2,
1433
    /// ...) replaced with corresponding values provided in
1434
    /// `params_values`
1435
    ///
1436
    /// See [`Self::with_param_values`] for examples and usage with an owned
1437
    /// `ParamValues`
1438
0
    pub fn replace_params_with_values(
1439
0
        self,
1440
0
        param_values: &ParamValues,
1441
0
    ) -> Result<LogicalPlan> {
1442
0
        self.transform_up_with_subqueries(|plan| {
1443
0
            let schema = Arc::clone(plan.schema());
1444
0
            let name_preserver = NamePreserver::new(&plan);
1445
0
            plan.map_expressions(|e| {
1446
0
                let original_name = name_preserver.save(&e);
1447
0
                let transformed_expr =
1448
0
                    e.infer_placeholder_types(&schema)?.transform_up(|e| {
1449
0
                        if let Expr::Placeholder(Placeholder { id, .. }) = e {
1450
0
                            let value = param_values.get_placeholders_with_values(&id)?;
1451
0
                            Ok(Transformed::yes(Expr::Literal(value)))
1452
                        } else {
1453
0
                            Ok(Transformed::no(e))
1454
                        }
1455
0
                    })?;
1456
                // Preserve name to avoid breaking column references to this expression
1457
0
                Ok(transformed_expr.update_data(|expr| original_name.restore(expr)))
1458
0
            })
1459
0
        })
1460
0
        .map(|res| res.data)
1461
0
    }
1462
1463
    /// Walk the logical plan, find any `Placeholder` tokens, and return a map of their IDs and DataTypes
1464
0
    pub fn get_parameter_types(
1465
0
        &self,
1466
0
    ) -> Result<HashMap<String, Option<DataType>>, DataFusionError> {
1467
0
        let mut param_types: HashMap<String, Option<DataType>> = HashMap::new();
1468
0
1469
0
        self.apply_with_subqueries(|plan| {
1470
0
            plan.apply_expressions(|expr| {
1471
0
                expr.apply(|expr| {
1472
0
                    if let Expr::Placeholder(Placeholder { id, data_type }) = expr {
1473
0
                        let prev = param_types.get(id);
1474
0
                        match (prev, data_type) {
1475
0
                            (Some(Some(prev)), Some(dt)) => {
1476
0
                                if prev != dt {
1477
0
                                    plan_err!("Conflicting types for {id}")?;
1478
0
                                }
1479
                            }
1480
0
                            (_, Some(dt)) => {
1481
0
                                param_types.insert(id.clone(), Some(dt.clone()));
1482
0
                            }
1483
0
                            _ => {}
1484
                        }
1485
0
                    }
1486
0
                    Ok(TreeNodeRecursion::Continue)
1487
0
                })
1488
0
            })
1489
0
        })
1490
0
        .map(|_| param_types)
1491
0
    }
1492
1493
    // ------------
1494
    // Various implementations for printing out LogicalPlans
1495
    // ------------
1496
1497
    /// Return a `format`able structure that produces a single line
1498
    /// per node.
1499
    ///
1500
    /// # Example
1501
    ///
1502
    /// ```text
1503
    /// Projection: employee.id
1504
    ///    Filter: employee.state Eq Utf8(\"CO\")\
1505
    ///       CsvScan: employee projection=Some([0, 3])
1506
    /// ```
1507
    ///
1508
    /// ```
1509
    /// use arrow::datatypes::{Field, Schema, DataType};
1510
    /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
1511
    /// let schema = Schema::new(vec![
1512
    ///     Field::new("id", DataType::Int32, false),
1513
    /// ]);
1514
    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
1515
    ///     .filter(col("id").eq(lit(5))).unwrap()
1516
    ///     .build().unwrap();
1517
    ///
1518
    /// // Format using display_indent
1519
    /// let display_string = format!("{}", plan.display_indent());
1520
    ///
1521
    /// assert_eq!("Filter: t1.id = Int32(5)\n  TableScan: t1",
1522
    ///             display_string);
1523
    /// ```
1524
0
    pub fn display_indent(&self) -> impl Display + '_ {
1525
        // Boilerplate structure to wrap LogicalPlan with something
1526
        // that that can be formatted
1527
        struct Wrapper<'a>(&'a LogicalPlan);
1528
        impl<'a> Display for Wrapper<'a> {
1529
0
            fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1530
0
                let with_schema = false;
1531
0
                let mut visitor = IndentVisitor::new(f, with_schema);
1532
0
                match self.0.visit_with_subqueries(&mut visitor) {
1533
0
                    Ok(_) => Ok(()),
1534
0
                    Err(_) => Err(fmt::Error),
1535
                }
1536
0
            }
1537
        }
1538
0
        Wrapper(self)
1539
0
    }
1540
1541
    /// Return a `format`able structure that produces a single line
1542
    /// per node that includes the output schema. For example:
1543
    ///
1544
    /// ```text
1545
    /// Projection: employee.id [id:Int32]\
1546
    ///    Filter: employee.state = Utf8(\"CO\") [id:Int32, state:Utf8]\
1547
    ///      TableScan: employee projection=[0, 3] [id:Int32, state:Utf8]";
1548
    /// ```
1549
    ///
1550
    /// ```
1551
    /// use arrow::datatypes::{Field, Schema, DataType};
1552
    /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
1553
    /// let schema = Schema::new(vec![
1554
    ///     Field::new("id", DataType::Int32, false),
1555
    /// ]);
1556
    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
1557
    ///     .filter(col("id").eq(lit(5))).unwrap()
1558
    ///     .build().unwrap();
1559
    ///
1560
    /// // Format using display_indent_schema
1561
    /// let display_string = format!("{}", plan.display_indent_schema());
1562
    ///
1563
    /// assert_eq!("Filter: t1.id = Int32(5) [id:Int32]\
1564
    ///             \n  TableScan: t1 [id:Int32]",
1565
    ///             display_string);
1566
    /// ```
1567
0
    pub fn display_indent_schema(&self) -> impl Display + '_ {
1568
        // Boilerplate structure to wrap LogicalPlan with something
1569
        // that that can be formatted
1570
        struct Wrapper<'a>(&'a LogicalPlan);
1571
        impl<'a> Display for Wrapper<'a> {
1572
0
            fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1573
0
                let with_schema = true;
1574
0
                let mut visitor = IndentVisitor::new(f, with_schema);
1575
0
                match self.0.visit_with_subqueries(&mut visitor) {
1576
0
                    Ok(_) => Ok(()),
1577
0
                    Err(_) => Err(fmt::Error),
1578
                }
1579
0
            }
1580
        }
1581
0
        Wrapper(self)
1582
0
    }
1583
1584
    /// Return a displayable structure that produces plan in postgresql JSON format.
1585
    ///
1586
    /// Users can use this format to visualize the plan in existing plan visualization tools, for example [dalibo](https://explain.dalibo.com/)
1587
0
    pub fn display_pg_json(&self) -> impl Display + '_ {
1588
        // Boilerplate structure to wrap LogicalPlan with something
1589
        // that that can be formatted
1590
        struct Wrapper<'a>(&'a LogicalPlan);
1591
        impl<'a> Display for Wrapper<'a> {
1592
0
            fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1593
0
                let mut visitor = PgJsonVisitor::new(f);
1594
0
                visitor.with_schema(true);
1595
0
                match self.0.visit_with_subqueries(&mut visitor) {
1596
0
                    Ok(_) => Ok(()),
1597
0
                    Err(_) => Err(fmt::Error),
1598
                }
1599
0
            }
1600
        }
1601
0
        Wrapper(self)
1602
0
    }
1603
1604
    /// Return a `format`able structure that produces lines meant for
1605
    /// graphical display using the `DOT` language. This format can be
1606
    /// visualized using software from
1607
    /// [`graphviz`](https://graphviz.org/)
1608
    ///
1609
    /// This currently produces two graphs -- one with the basic
1610
    /// structure, and one with additional details such as schema.
1611
    ///
1612
    /// ```
1613
    /// use arrow::datatypes::{Field, Schema, DataType};
1614
    /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
1615
    /// let schema = Schema::new(vec![
1616
    ///     Field::new("id", DataType::Int32, false),
1617
    /// ]);
1618
    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
1619
    ///     .filter(col("id").eq(lit(5))).unwrap()
1620
    ///     .build().unwrap();
1621
    ///
1622
    /// // Format using display_graphviz
1623
    /// let graphviz_string = format!("{}", plan.display_graphviz());
1624
    /// ```
1625
    ///
1626
    /// If graphviz string is saved to a file such as `/tmp/example.dot`, the following
1627
    /// commands can be used to render it as a pdf:
1628
    ///
1629
    /// ```bash
1630
    ///   dot -Tpdf < /tmp/example.dot  > /tmp/example.pdf
1631
    /// ```
1632
    ///
1633
0
    pub fn display_graphviz(&self) -> impl Display + '_ {
1634
        // Boilerplate structure to wrap LogicalPlan with something
1635
        // that that can be formatted
1636
        struct Wrapper<'a>(&'a LogicalPlan);
1637
        impl<'a> Display for Wrapper<'a> {
1638
0
            fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1639
0
                let mut visitor = GraphvizVisitor::new(f);
1640
0
1641
0
                visitor.start_graph()?;
1642
1643
0
                visitor.pre_visit_plan("LogicalPlan")?;
1644
0
                self.0
1645
0
                    .visit_with_subqueries(&mut visitor)
1646
0
                    .map_err(|_| fmt::Error)?;
1647
0
                visitor.post_visit_plan()?;
1648
1649
0
                visitor.set_with_schema(true);
1650
0
                visitor.pre_visit_plan("Detailed LogicalPlan")?;
1651
0
                self.0
1652
0
                    .visit_with_subqueries(&mut visitor)
1653
0
                    .map_err(|_| fmt::Error)?;
1654
0
                visitor.post_visit_plan()?;
1655
1656
0
                visitor.end_graph()?;
1657
0
                Ok(())
1658
0
            }
1659
        }
1660
0
        Wrapper(self)
1661
0
    }
1662
1663
    /// Return a `format`able structure with the a human readable
1664
    /// description of this LogicalPlan node per node, not including
1665
    /// children. For example:
1666
    ///
1667
    /// ```text
1668
    /// Projection: id
1669
    /// ```
1670
    /// ```
1671
    /// use arrow::datatypes::{Field, Schema, DataType};
1672
    /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
1673
    /// let schema = Schema::new(vec![
1674
    ///     Field::new("id", DataType::Int32, false),
1675
    /// ]);
1676
    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
1677
    ///     .build().unwrap();
1678
    ///
1679
    /// // Format using display
1680
    /// let display_string = format!("{}", plan.display());
1681
    ///
1682
    /// assert_eq!("TableScan: t1", display_string);
1683
    /// ```
1684
0
    pub fn display(&self) -> impl Display + '_ {
1685
        // Boilerplate structure to wrap LogicalPlan with something
1686
        // that that can be formatted
1687
        struct Wrapper<'a>(&'a LogicalPlan);
1688
        impl<'a> Display for Wrapper<'a> {
1689
0
            fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1690
0
                match self.0 {
1691
0
                    LogicalPlan::EmptyRelation(_) => write!(f, "EmptyRelation"),
1692
                    LogicalPlan::RecursiveQuery(RecursiveQuery {
1693
0
                        is_distinct, ..
1694
0
                    }) => {
1695
0
                        write!(f, "RecursiveQuery: is_distinct={}", is_distinct)
1696
                    }
1697
0
                    LogicalPlan::Values(Values { ref values, .. }) => {
1698
0
                        let str_values: Vec<_> = values
1699
0
                            .iter()
1700
0
                            // limit to only 5 values to avoid horrible display
1701
0
                            .take(5)
1702
0
                            .map(|row| {
1703
0
                                let item = row
1704
0
                                    .iter()
1705
0
                                    .map(|expr| expr.to_string())
1706
0
                                    .collect::<Vec<_>>()
1707
0
                                    .join(", ");
1708
0
                                format!("({item})")
1709
0
                            })
1710
0
                            .collect();
1711
1712
0
                        let eclipse = if values.len() > 5 { "..." } else { "" };
1713
0
                        write!(f, "Values: {}{}", str_values.join(", "), eclipse)
1714
                    }
1715
1716
                    LogicalPlan::TableScan(TableScan {
1717
0
                        ref source,
1718
0
                        ref table_name,
1719
0
                        ref projection,
1720
0
                        ref filters,
1721
0
                        ref fetch,
1722
                        ..
1723
                    }) => {
1724
0
                        let projected_fields = match projection {
1725
0
                            Some(indices) => {
1726
0
                                let schema = source.schema();
1727
0
                                let names: Vec<&str> = indices
1728
0
                                    .iter()
1729
0
                                    .map(|i| schema.field(*i).name().as_str())
1730
0
                                    .collect();
1731
0
                                format!(" projection=[{}]", names.join(", "))
1732
                            }
1733
0
                            _ => "".to_string(),
1734
                        };
1735
1736
0
                        write!(f, "TableScan: {table_name}{projected_fields}")?;
1737
1738
0
                        if !filters.is_empty() {
1739
0
                            let mut full_filter = vec![];
1740
0
                            let mut partial_filter = vec![];
1741
0
                            let mut unsupported_filters = vec![];
1742
0
                            let filters: Vec<&Expr> = filters.iter().collect();
1743
1744
0
                            if let Ok(results) =
1745
0
                                source.supports_filters_pushdown(&filters)
1746
0
                            {
1747
0
                                filters.iter().zip(results.iter()).for_each(
1748
0
                                    |(x, res)| match res {
1749
                                        TableProviderFilterPushDown::Exact => {
1750
0
                                            full_filter.push(x)
1751
                                        }
1752
                                        TableProviderFilterPushDown::Inexact => {
1753
0
                                            partial_filter.push(x)
1754
                                        }
1755
                                        TableProviderFilterPushDown::Unsupported => {
1756
0
                                            unsupported_filters.push(x)
1757
                                        }
1758
0
                                    },
1759
0
                                );
1760
0
                            }
1761
1762
0
                            if !full_filter.is_empty() {
1763
0
                                write!(
1764
0
                                    f,
1765
0
                                    ", full_filters=[{}]",
1766
0
                                    expr_vec_fmt!(full_filter)
1767
0
                                )?;
1768
0
                            };
1769
0
                            if !partial_filter.is_empty() {
1770
0
                                write!(
1771
0
                                    f,
1772
0
                                    ", partial_filters=[{}]",
1773
0
                                    expr_vec_fmt!(partial_filter)
1774
0
                                )?;
1775
0
                            }
1776
0
                            if !unsupported_filters.is_empty() {
1777
0
                                write!(
1778
0
                                    f,
1779
0
                                    ", unsupported_filters=[{}]",
1780
0
                                    expr_vec_fmt!(unsupported_filters)
1781
0
                                )?;
1782
0
                            }
1783
0
                        }
1784
1785
0
                        if let Some(n) = fetch {
1786
0
                            write!(f, ", fetch={n}")?;
1787
0
                        }
1788
1789
0
                        Ok(())
1790
                    }
1791
0
                    LogicalPlan::Projection(Projection { ref expr, .. }) => {
1792
0
                        write!(f, "Projection: ")?;
1793
0
                        for (i, expr_item) in expr.iter().enumerate() {
1794
0
                            if i > 0 {
1795
0
                                write!(f, ", ")?;
1796
0
                            }
1797
0
                            write!(f, "{expr_item}")?;
1798
                        }
1799
0
                        Ok(())
1800
                    }
1801
0
                    LogicalPlan::Dml(DmlStatement { table_name, op, .. }) => {
1802
0
                        write!(f, "Dml: op=[{op}] table=[{table_name}]")
1803
                    }
1804
                    LogicalPlan::Copy(CopyTo {
1805
                        input: _,
1806
0
                        output_url,
1807
0
                        file_type,
1808
0
                        options,
1809
0
                        ..
1810
0
                    }) => {
1811
0
                        let op_str = options
1812
0
                            .iter()
1813
0
                            .map(|(k, v)| format!("{k} {v}"))
1814
0
                            .collect::<Vec<String>>()
1815
0
                            .join(", ");
1816
0
1817
0
                        write!(f, "CopyTo: format={} output_url={output_url} options: ({op_str})", file_type.get_ext())
1818
                    }
1819
0
                    LogicalPlan::Ddl(ddl) => {
1820
0
                        write!(f, "{}", ddl.display())
1821
                    }
1822
                    LogicalPlan::Filter(Filter {
1823
0
                        predicate: ref expr,
1824
0
                        ..
1825
0
                    }) => write!(f, "Filter: {expr}"),
1826
                    LogicalPlan::Window(Window {
1827
0
                        ref window_expr, ..
1828
0
                    }) => {
1829
0
                        write!(
1830
0
                            f,
1831
0
                            "WindowAggr: windowExpr=[[{}]]",
1832
0
                            expr_vec_fmt!(window_expr)
1833
0
                        )
1834
                    }
1835
                    LogicalPlan::Aggregate(Aggregate {
1836
0
                        ref group_expr,
1837
0
                        ref aggr_expr,
1838
0
                        ..
1839
0
                    }) => write!(
1840
0
                        f,
1841
0
                        "Aggregate: groupBy=[[{}]], aggr=[[{}]]",
1842
0
                        expr_vec_fmt!(group_expr),
1843
0
                        expr_vec_fmt!(aggr_expr)
1844
0
                    ),
1845
0
                    LogicalPlan::Sort(Sort { expr, fetch, .. }) => {
1846
0
                        write!(f, "Sort: ")?;
1847
0
                        for (i, expr_item) in expr.iter().enumerate() {
1848
0
                            if i > 0 {
1849
0
                                write!(f, ", ")?;
1850
0
                            }
1851
0
                            write!(f, "{expr_item}")?;
1852
                        }
1853
0
                        if let Some(a) = fetch {
1854
0
                            write!(f, ", fetch={a}")?;
1855
0
                        }
1856
1857
0
                        Ok(())
1858
                    }
1859
                    LogicalPlan::Join(Join {
1860
0
                        on: ref keys,
1861
0
                        filter,
1862
0
                        join_constraint,
1863
0
                        join_type,
1864
0
                        ..
1865
0
                    }) => {
1866
0
                        let join_expr: Vec<String> =
1867
0
                            keys.iter().map(|(l, r)| format!("{l} = {r}")).collect();
1868
0
                        let filter_expr = filter
1869
0
                            .as_ref()
1870
0
                            .map(|expr| format!(" Filter: {expr}"))
1871
0
                            .unwrap_or_else(|| "".to_string());
1872
0
                        match join_constraint {
1873
                            JoinConstraint::On => {
1874
0
                                write!(
1875
0
                                    f,
1876
0
                                    "{} Join: {}{}",
1877
0
                                    join_type,
1878
0
                                    join_expr.join(", "),
1879
0
                                    filter_expr
1880
0
                                )
1881
                            }
1882
                            JoinConstraint::Using => {
1883
0
                                write!(
1884
0
                                    f,
1885
0
                                    "{} Join: Using {}{}",
1886
0
                                    join_type,
1887
0
                                    join_expr.join(", "),
1888
0
                                    filter_expr,
1889
0
                                )
1890
                            }
1891
                        }
1892
                    }
1893
                    LogicalPlan::CrossJoin(_) => {
1894
0
                        write!(f, "CrossJoin:")
1895
                    }
1896
                    LogicalPlan::Repartition(Repartition {
1897
0
                        partitioning_scheme,
1898
0
                        ..
1899
0
                    }) => match partitioning_scheme {
1900
0
                        Partitioning::RoundRobinBatch(n) => {
1901
0
                            write!(f, "Repartition: RoundRobinBatch partition_count={n}")
1902
                        }
1903
0
                        Partitioning::Hash(expr, n) => {
1904
0
                            let hash_expr: Vec<String> =
1905
0
                                expr.iter().map(|e| format!("{e}")).collect();
1906
0
                            write!(
1907
0
                                f,
1908
0
                                "Repartition: Hash({}) partition_count={}",
1909
0
                                hash_expr.join(", "),
1910
0
                                n
1911
0
                            )
1912
                        }
1913
0
                        Partitioning::DistributeBy(expr) => {
1914
0
                            let dist_by_expr: Vec<String> =
1915
0
                                expr.iter().map(|e| format!("{e}")).collect();
1916
0
                            write!(
1917
0
                                f,
1918
0
                                "Repartition: DistributeBy({})",
1919
0
                                dist_by_expr.join(", "),
1920
0
                            )
1921
                        }
1922
                    },
1923
                    LogicalPlan::Limit(Limit {
1924
0
                        ref skip,
1925
0
                        ref fetch,
1926
0
                        ..
1927
0
                    }) => {
1928
0
                        write!(
1929
0
                            f,
1930
0
                            "Limit: skip={}, fetch={}",
1931
0
                            skip,
1932
0
                            fetch.map_or_else(|| "None".to_string(), |x| x.to_string())
1933
0
                        )
1934
                    }
1935
                    LogicalPlan::Subquery(Subquery { .. }) => {
1936
0
                        write!(f, "Subquery:")
1937
                    }
1938
0
                    LogicalPlan::SubqueryAlias(SubqueryAlias { ref alias, .. }) => {
1939
0
                        write!(f, "SubqueryAlias: {alias}")
1940
                    }
1941
0
                    LogicalPlan::Statement(statement) => {
1942
0
                        write!(f, "{}", statement.display())
1943
                    }
1944
0
                    LogicalPlan::Distinct(distinct) => match distinct {
1945
0
                        Distinct::All(_) => write!(f, "Distinct:"),
1946
                        Distinct::On(DistinctOn {
1947
0
                            on_expr,
1948
0
                            select_expr,
1949
0
                            sort_expr,
1950
0
                            ..
1951
0
                        }) => write!(
1952
0
                            f,
1953
0
                            "DistinctOn: on_expr=[[{}]], select_expr=[[{}]], sort_expr=[[{}]]",
1954
0
                            expr_vec_fmt!(on_expr),
1955
0
                            expr_vec_fmt!(select_expr),
1956
0
                            if let Some(sort_expr) = sort_expr { expr_vec_fmt!(sort_expr) } else { "".to_string() },
1957
                        ),
1958
                    },
1959
0
                    LogicalPlan::Explain { .. } => write!(f, "Explain"),
1960
0
                    LogicalPlan::Analyze { .. } => write!(f, "Analyze"),
1961
0
                    LogicalPlan::Union(_) => write!(f, "Union"),
1962
0
                    LogicalPlan::Extension(e) => e.node.fmt_for_explain(f),
1963
                    LogicalPlan::Prepare(Prepare {
1964
0
                        name, data_types, ..
1965
0
                    }) => {
1966
0
                        write!(f, "Prepare: {name:?} {data_types:?} ")
1967
                    }
1968
                    LogicalPlan::DescribeTable(DescribeTable { .. }) => {
1969
0
                        write!(f, "DescribeTable")
1970
                    }
1971
                    LogicalPlan::Unnest(Unnest {
1972
0
                        input: plan,
1973
0
                        list_type_columns: list_col_indices,
1974
0
                        struct_type_columns: struct_col_indices, .. }) => {
1975
0
                        let input_columns = plan.schema().columns();
1976
0
                        let list_type_columns = list_col_indices
1977
0
                            .iter()
1978
0
                            .map(|(i,unnest_info)|
1979
0
                                format!("{}|depth={}", &input_columns[*i].to_string(),
1980
0
                                unnest_info.depth))
1981
0
                            .collect::<Vec<String>>();
1982
0
                        let struct_type_columns = struct_col_indices
1983
0
                            .iter()
1984
0
                            .map(|i| &input_columns[*i])
1985
0
                            .collect::<Vec<&Column>>();
1986
0
                        // get items from input_columns indexed by list_col_indices
1987
0
                        write!(f, "Unnest: lists[{}] structs[{}]",
1988
0
                        expr_vec_fmt!(list_type_columns),
1989
0
                        expr_vec_fmt!(struct_type_columns))
1990
                    }
1991
                }
1992
0
            }
1993
        }
1994
0
        Wrapper(self)
1995
0
    }
1996
}
1997
1998
impl Display for LogicalPlan {
1999
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
2000
0
        self.display_indent().fmt(f)
2001
0
    }
2002
}
2003
2004
impl ToStringifiedPlan for LogicalPlan {
2005
0
    fn to_stringified(&self, plan_type: PlanType) -> StringifiedPlan {
2006
0
        StringifiedPlan::new(plan_type, self.display_indent().to_string())
2007
0
    }
2008
}
2009
2010
/// Produces no rows: An empty relation with an empty schema
2011
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2012
pub struct EmptyRelation {
2013
    /// Whether to produce a placeholder row
2014
    pub produce_one_row: bool,
2015
    /// The schema description of the output
2016
    pub schema: DFSchemaRef,
2017
}
2018
2019
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2020
impl PartialOrd for EmptyRelation {
2021
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2022
0
        self.produce_one_row.partial_cmp(&other.produce_one_row)
2023
0
    }
2024
}
2025
2026
/// A variadic query operation, Recursive CTE.
2027
///
2028
/// # Recursive Query Evaluation
2029
///
2030
/// From the [Postgres Docs]:
2031
///
2032
/// 1. Evaluate the non-recursive term. For `UNION` (but not `UNION ALL`),
2033
///    discard duplicate rows. Include all remaining rows in the result of the
2034
///    recursive query, and also place them in a temporary working table.
2035
///
2036
/// 2. So long as the working table is not empty, repeat these steps:
2037
///
2038
/// * Evaluate the recursive term, substituting the current contents of the
2039
///   working table for the recursive self-reference. For `UNION` (but not `UNION
2040
///   ALL`), discard duplicate rows and rows that duplicate any previous result
2041
///   row. Include all remaining rows in the result of the recursive query, and
2042
///   also place them in a temporary intermediate table.
2043
///
2044
/// * Replace the contents of the working table with the contents of the
2045
///   intermediate table, then empty the intermediate table.
2046
///
2047
/// [Postgres Docs]: https://www.postgresql.org/docs/current/queries-with.html#QUERIES-WITH-RECURSIVE
2048
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
2049
pub struct RecursiveQuery {
2050
    /// Name of the query
2051
    pub name: String,
2052
    /// The static term (initial contents of the working table)
2053
    pub static_term: Arc<LogicalPlan>,
2054
    /// The recursive term (evaluated on the contents of the working table until
2055
    /// it returns an empty set)
2056
    pub recursive_term: Arc<LogicalPlan>,
2057
    /// Should the output of the recursive term be deduplicated (`UNION`) or
2058
    /// not (`UNION ALL`).
2059
    pub is_distinct: bool,
2060
}
2061
2062
/// Values expression. See
2063
/// [Postgres VALUES](https://www.postgresql.org/docs/current/queries-values.html)
2064
/// documentation for more details.
2065
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2066
pub struct Values {
2067
    /// The table schema
2068
    pub schema: DFSchemaRef,
2069
    /// Values
2070
    pub values: Vec<Vec<Expr>>,
2071
}
2072
2073
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2074
impl PartialOrd for Values {
2075
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2076
0
        self.values.partial_cmp(&other.values)
2077
0
    }
2078
}
2079
2080
/// Evaluates an arbitrary list of expressions (essentially a
2081
/// SELECT with an expression list) on its input.
2082
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
2083
// mark non_exhaustive to encourage use of try_new/new()
2084
#[non_exhaustive]
2085
pub struct Projection {
2086
    /// The list of expressions
2087
    pub expr: Vec<Expr>,
2088
    /// The incoming logical plan
2089
    pub input: Arc<LogicalPlan>,
2090
    /// The schema description of the output
2091
    pub schema: DFSchemaRef,
2092
}
2093
2094
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2095
impl PartialOrd for Projection {
2096
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2097
0
        match self.expr.partial_cmp(&other.expr) {
2098
0
            Some(Ordering::Equal) => self.input.partial_cmp(&other.input),
2099
0
            cmp => cmp,
2100
        }
2101
0
    }
2102
}
2103
2104
impl Projection {
2105
    /// Create a new Projection
2106
0
    pub fn try_new(expr: Vec<Expr>, input: Arc<LogicalPlan>) -> Result<Self> {
2107
0
        let projection_schema = projection_schema(&input, &expr)?;
2108
0
        Self::try_new_with_schema(expr, input, projection_schema)
2109
0
    }
2110
2111
    /// Create a new Projection using the specified output schema
2112
0
    pub fn try_new_with_schema(
2113
0
        expr: Vec<Expr>,
2114
0
        input: Arc<LogicalPlan>,
2115
0
        schema: DFSchemaRef,
2116
0
    ) -> Result<Self> {
2117
0
        if !expr.iter().any(|e| matches!(e, Expr::Wildcard { .. }))
2118
0
            && expr.len() != schema.fields().len()
2119
        {
2120
0
            return plan_err!("Projection has mismatch between number of expressions ({}) and number of fields in schema ({})", expr.len(), schema.fields().len());
2121
0
        }
2122
0
        Ok(Self {
2123
0
            expr,
2124
0
            input,
2125
0
            schema,
2126
0
        })
2127
0
    }
2128
2129
    /// Create a new Projection using the specified output schema
2130
0
    pub fn new_from_schema(input: Arc<LogicalPlan>, schema: DFSchemaRef) -> Self {
2131
0
        let expr: Vec<Expr> = schema.columns().into_iter().map(Expr::Column).collect();
2132
0
        Self {
2133
0
            expr,
2134
0
            input,
2135
0
            schema,
2136
0
        }
2137
0
    }
2138
}
2139
2140
/// Computes the schema of the result produced by applying a projection to the input logical plan.
2141
///
2142
/// # Arguments
2143
///
2144
/// * `input`: A reference to the input `LogicalPlan` for which the projection schema
2145
///   will be computed.
2146
/// * `exprs`: A slice of `Expr` expressions representing the projection operation to apply.
2147
///
2148
/// # Returns
2149
///
2150
/// A `Result` containing an `Arc<DFSchema>` representing the schema of the result
2151
/// produced by the projection operation. If the schema computation is successful,
2152
/// the `Result` will contain the schema; otherwise, it will contain an error.
2153
0
pub fn projection_schema(input: &LogicalPlan, exprs: &[Expr]) -> Result<Arc<DFSchema>> {
2154
0
    let metadata = input.schema().metadata().clone();
2155
2156
0
    let schema =
2157
0
        DFSchema::new_with_metadata(exprlist_to_fields(exprs, input)?, metadata)?
2158
0
            .with_functional_dependencies(calc_func_dependencies_for_project(
2159
0
                exprs, input,
2160
0
            )?)?;
2161
2162
0
    Ok(Arc::new(schema))
2163
0
}
2164
2165
/// Aliased subquery
2166
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2167
// mark non_exhaustive to encourage use of try_new/new()
2168
#[non_exhaustive]
2169
pub struct SubqueryAlias {
2170
    /// The incoming logical plan
2171
    pub input: Arc<LogicalPlan>,
2172
    /// The alias for the input relation
2173
    pub alias: TableReference,
2174
    /// The schema with qualified field names
2175
    pub schema: DFSchemaRef,
2176
}
2177
2178
impl SubqueryAlias {
2179
0
    pub fn try_new(
2180
0
        plan: Arc<LogicalPlan>,
2181
0
        alias: impl Into<TableReference>,
2182
0
    ) -> Result<Self> {
2183
0
        let alias = alias.into();
2184
0
        let fields = change_redundant_column(plan.schema().fields());
2185
0
        let meta_data = plan.schema().as_ref().metadata().clone();
2186
0
        let schema: Schema =
2187
0
            DFSchema::from_unqualified_fields(fields.into(), meta_data)?.into();
2188
0
        // Since schema is the same, other than qualifier, we can use existing
2189
0
        // functional dependencies:
2190
0
        let func_dependencies = plan.schema().functional_dependencies().clone();
2191
0
        let schema = DFSchemaRef::new(
2192
0
            DFSchema::try_from_qualified_schema(alias.clone(), &schema)?
2193
0
                .with_functional_dependencies(func_dependencies)?,
2194
        );
2195
0
        Ok(SubqueryAlias {
2196
0
            input: plan,
2197
0
            alias,
2198
0
            schema,
2199
0
        })
2200
0
    }
2201
}
2202
2203
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2204
impl PartialOrd for SubqueryAlias {
2205
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2206
0
        match self.input.partial_cmp(&other.input) {
2207
0
            Some(Ordering::Equal) => self.alias.partial_cmp(&other.alias),
2208
0
            cmp => cmp,
2209
        }
2210
0
    }
2211
}
2212
2213
/// Filters rows from its input that do not match an
2214
/// expression (essentially a WHERE clause with a predicate
2215
/// expression).
2216
///
2217
/// Semantically, `<predicate>` is evaluated for each row of the input;
2218
/// If the value of `<predicate>` is true, the input row is passed to
2219
/// the output. If the value of `<predicate>` is false, the row is
2220
/// discarded.
2221
///
2222
/// Filter should not be created directly but instead use `try_new()`
2223
/// and that these fields are only pub to support pattern matching
2224
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
2225
#[non_exhaustive]
2226
pub struct Filter {
2227
    /// The predicate expression, which must have Boolean type.
2228
    pub predicate: Expr,
2229
    /// The incoming logical plan
2230
    pub input: Arc<LogicalPlan>,
2231
    /// The flag to indicate if the filter is a having clause
2232
    pub having: bool,
2233
}
2234
2235
impl Filter {
2236
    /// Create a new filter operator.
2237
    ///
2238
    /// Notes: as Aliases have no effect on the output of a filter operator,
2239
    /// they are removed from the predicate expression.
2240
0
    pub fn try_new(predicate: Expr, input: Arc<LogicalPlan>) -> Result<Self> {
2241
0
        Self::try_new_internal(predicate, input, false)
2242
0
    }
2243
2244
    /// Create a new filter operator for a having clause.
2245
    /// This is similar to a filter, but its having flag is set to true.
2246
0
    pub fn try_new_with_having(predicate: Expr, input: Arc<LogicalPlan>) -> Result<Self> {
2247
0
        Self::try_new_internal(predicate, input, true)
2248
0
    }
2249
2250
0
    fn is_allowed_filter_type(data_type: &DataType) -> bool {
2251
0
        match data_type {
2252
            // Interpret NULL as a missing boolean value.
2253
0
            DataType::Boolean | DataType::Null => true,
2254
0
            DataType::Dictionary(_, value_type) => {
2255
0
                Filter::is_allowed_filter_type(value_type.as_ref())
2256
            }
2257
0
            _ => false,
2258
        }
2259
0
    }
2260
2261
0
    fn try_new_internal(
2262
0
        predicate: Expr,
2263
0
        input: Arc<LogicalPlan>,
2264
0
        having: bool,
2265
0
    ) -> Result<Self> {
2266
        // Filter predicates must return a boolean value so we try and validate that here.
2267
        // Note that it is not always possible to resolve the predicate expression during plan
2268
        // construction (such as with correlated subqueries) so we make a best effort here and
2269
        // ignore errors resolving the expression against the schema.
2270
0
        if let Ok(predicate_type) = predicate.get_type(input.schema()) {
2271
0
            if !Filter::is_allowed_filter_type(&predicate_type) {
2272
0
                return plan_err!(
2273
0
                    "Cannot create filter with non-boolean predicate '{predicate}' returning {predicate_type}"
2274
0
                );
2275
0
            }
2276
0
        }
2277
2278
0
        Ok(Self {
2279
0
            predicate: predicate.unalias_nested().data,
2280
0
            input,
2281
0
            having,
2282
0
        })
2283
0
    }
2284
2285
    /// Is this filter guaranteed to return 0 or 1 row in a given instantiation?
2286
    ///
2287
    /// This function will return `true` if its predicate contains a conjunction of
2288
    /// `col(a) = <expr>`, where its schema has a unique filter that is covered
2289
    /// by this conjunction.
2290
    ///
2291
    /// For example, for the table:
2292
    /// ```sql
2293
    /// CREATE TABLE t (a INTEGER PRIMARY KEY, b INTEGER);
2294
    /// ```
2295
    /// `Filter(a = 2).is_scalar() == true`
2296
    /// , whereas
2297
    /// `Filter(b = 2).is_scalar() == false`
2298
    /// and
2299
    /// `Filter(a = 2 OR b = 2).is_scalar() == false`
2300
0
    fn is_scalar(&self) -> bool {
2301
0
        let schema = self.input.schema();
2302
0
2303
0
        let functional_dependencies = self.input.schema().functional_dependencies();
2304
0
        let unique_keys = functional_dependencies.iter().filter(|dep| {
2305
0
            let nullable = dep.nullable
2306
0
                && dep
2307
0
                    .source_indices
2308
0
                    .iter()
2309
0
                    .any(|&source| schema.field(source).is_nullable());
2310
0
            !nullable
2311
0
                && dep.mode == Dependency::Single
2312
0
                && dep.target_indices.len() == schema.fields().len()
2313
0
        });
2314
0
2315
0
        let exprs = split_conjunction(&self.predicate);
2316
0
        let eq_pred_cols: HashSet<_> = exprs
2317
0
            .iter()
2318
0
            .filter_map(|expr| {
2319
                let Expr::BinaryExpr(BinaryExpr {
2320
0
                    left,
2321
0
                    op: Operator::Eq,
2322
0
                    right,
2323
0
                }) = expr
2324
                else {
2325
0
                    return None;
2326
                };
2327
                // This is a no-op filter expression
2328
0
                if left == right {
2329
0
                    return None;
2330
0
                }
2331
0
2332
0
                match (left.as_ref(), right.as_ref()) {
2333
0
                    (Expr::Column(_), Expr::Column(_)) => None,
2334
0
                    (Expr::Column(c), _) | (_, Expr::Column(c)) => {
2335
0
                        Some(schema.index_of_column(c).unwrap())
2336
                    }
2337
0
                    _ => None,
2338
                }
2339
0
            })
2340
0
            .collect();
2341
2342
        // If we have a functional dependence that is a subset of our predicate,
2343
        // this filter is scalar
2344
0
        for key in unique_keys {
2345
0
            if key.source_indices.iter().all(|c| eq_pred_cols.contains(c)) {
2346
0
                return true;
2347
0
            }
2348
        }
2349
0
        false
2350
0
    }
2351
}
2352
2353
/// Window its input based on a set of window spec and window function (e.g. SUM or RANK)
2354
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2355
pub struct Window {
2356
    /// The incoming logical plan
2357
    pub input: Arc<LogicalPlan>,
2358
    /// The window function expression
2359
    pub window_expr: Vec<Expr>,
2360
    /// The schema description of the window output
2361
    pub schema: DFSchemaRef,
2362
}
2363
2364
impl Window {
2365
    /// Create a new window operator.
2366
0
    pub fn try_new(window_expr: Vec<Expr>, input: Arc<LogicalPlan>) -> Result<Self> {
2367
0
        let fields: Vec<(Option<TableReference>, Arc<Field>)> = input
2368
0
            .schema()
2369
0
            .iter()
2370
0
            .map(|(q, f)| (q.cloned(), Arc::clone(f)))
2371
0
            .collect();
2372
0
        let input_len = fields.len();
2373
0
        let mut window_fields = fields;
2374
0
        let expr_fields = exprlist_to_fields(window_expr.as_slice(), &input)?;
2375
0
        window_fields.extend_from_slice(expr_fields.as_slice());
2376
0
        let metadata = input.schema().metadata().clone();
2377
0
2378
0
        // Update functional dependencies for window:
2379
0
        let mut window_func_dependencies =
2380
0
            input.schema().functional_dependencies().clone();
2381
0
        window_func_dependencies.extend_target_indices(window_fields.len());
2382
0
2383
0
        // Since we know that ROW_NUMBER outputs will be unique (i.e. it consists
2384
0
        // of consecutive numbers per partition), we can represent this fact with
2385
0
        // functional dependencies.
2386
0
        let mut new_dependencies = window_expr
2387
0
            .iter()
2388
0
            .enumerate()
2389
0
            .filter_map(|(idx, expr)| {
2390
                if let Expr::WindowFunction(WindowFunction {
2391
0
                    fun: WindowFunctionDefinition::WindowUDF(udwf),
2392
0
                    partition_by,
2393
                    ..
2394
0
                }) = expr
2395
                {
2396
                    // When there is no PARTITION BY, row number will be unique
2397
                    // across the entire table.
2398
0
                    if udwf.name() == "row_number" && partition_by.is_empty() {
2399
0
                        return Some(idx + input_len);
2400
0
                    }
2401
0
                }
2402
0
                None
2403
0
            })
2404
0
            .map(|idx| {
2405
0
                FunctionalDependence::new(vec![idx], vec![], false)
2406
0
                    .with_mode(Dependency::Single)
2407
0
            })
2408
0
            .collect::<Vec<_>>();
2409
0
2410
0
        if !new_dependencies.is_empty() {
2411
0
            for dependence in new_dependencies.iter_mut() {
2412
0
                dependence.target_indices = (0..window_fields.len()).collect();
2413
0
            }
2414
            // Add the dependency introduced because of ROW_NUMBER window function to the functional dependency
2415
0
            let new_deps = FunctionalDependencies::new(new_dependencies);
2416
0
            window_func_dependencies.extend(new_deps);
2417
0
        }
2418
2419
        Self::try_new_with_schema(
2420
0
            window_expr,
2421
0
            input,
2422
0
            Arc::new(
2423
0
                DFSchema::new_with_metadata(window_fields, metadata)?
2424
0
                    .with_functional_dependencies(window_func_dependencies)?,
2425
            ),
2426
        )
2427
0
    }
2428
2429
0
    pub fn try_new_with_schema(
2430
0
        window_expr: Vec<Expr>,
2431
0
        input: Arc<LogicalPlan>,
2432
0
        schema: DFSchemaRef,
2433
0
    ) -> Result<Self> {
2434
0
        if window_expr.len() != schema.fields().len() - input.schema().fields().len() {
2435
0
            return plan_err!(
2436
0
                "Window has mismatch between number of expressions ({}) and number of fields in schema ({})",
2437
0
                window_expr.len(),
2438
0
                schema.fields().len() - input.schema().fields().len()
2439
0
            );
2440
0
        }
2441
0
2442
0
        Ok(Window {
2443
0
            input,
2444
0
            window_expr,
2445
0
            schema,
2446
0
        })
2447
0
    }
2448
}
2449
2450
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2451
impl PartialOrd for Window {
2452
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2453
0
        match self.input.partial_cmp(&other.input) {
2454
0
            Some(Ordering::Equal) => self.window_expr.partial_cmp(&other.window_expr),
2455
0
            cmp => cmp,
2456
        }
2457
0
    }
2458
}
2459
2460
/// Produces rows from a table provider by reference or from the context
2461
#[derive(Clone)]
2462
pub struct TableScan {
2463
    /// The name of the table
2464
    pub table_name: TableReference,
2465
    /// The source of the table
2466
    pub source: Arc<dyn TableSource>,
2467
    /// Optional column indices to use as a projection
2468
    pub projection: Option<Vec<usize>>,
2469
    /// The schema description of the output
2470
    pub projected_schema: DFSchemaRef,
2471
    /// Optional expressions to be used as filters by the table provider
2472
    pub filters: Vec<Expr>,
2473
    /// Optional number of rows to read
2474
    pub fetch: Option<usize>,
2475
}
2476
2477
impl Debug for TableScan {
2478
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
2479
0
        f.debug_struct("TableScan")
2480
0
            .field("table_name", &self.table_name)
2481
0
            .field("source", &"...")
2482
0
            .field("projection", &self.projection)
2483
0
            .field("projected_schema", &self.projected_schema)
2484
0
            .field("filters", &self.filters)
2485
0
            .field("fetch", &self.fetch)
2486
0
            .finish_non_exhaustive()
2487
0
    }
2488
}
2489
2490
impl PartialEq for TableScan {
2491
0
    fn eq(&self, other: &Self) -> bool {
2492
0
        self.table_name == other.table_name
2493
0
            && self.projection == other.projection
2494
0
            && self.projected_schema == other.projected_schema
2495
0
            && self.filters == other.filters
2496
0
            && self.fetch == other.fetch
2497
0
    }
2498
}
2499
2500
impl Eq for TableScan {}
2501
2502
// Manual implementation needed because of `source` and `projected_schema` fields.
2503
// Comparison excludes these field.
2504
impl PartialOrd for TableScan {
2505
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2506
        #[derive(PartialEq, PartialOrd)]
2507
        struct ComparableTableScan<'a> {
2508
            /// The name of the table
2509
            pub table_name: &'a TableReference,
2510
            /// Optional column indices to use as a projection
2511
            pub projection: &'a Option<Vec<usize>>,
2512
            /// Optional expressions to be used as filters by the table provider
2513
            pub filters: &'a Vec<Expr>,
2514
            /// Optional number of rows to read
2515
            pub fetch: &'a Option<usize>,
2516
        }
2517
0
        let comparable_self = ComparableTableScan {
2518
0
            table_name: &self.table_name,
2519
0
            projection: &self.projection,
2520
0
            filters: &self.filters,
2521
0
            fetch: &self.fetch,
2522
0
        };
2523
0
        let comparable_other = ComparableTableScan {
2524
0
            table_name: &other.table_name,
2525
0
            projection: &other.projection,
2526
0
            filters: &other.filters,
2527
0
            fetch: &other.fetch,
2528
0
        };
2529
0
        comparable_self.partial_cmp(&comparable_other)
2530
0
    }
2531
}
2532
2533
impl Hash for TableScan {
2534
0
    fn hash<H: Hasher>(&self, state: &mut H) {
2535
0
        self.table_name.hash(state);
2536
0
        self.projection.hash(state);
2537
0
        self.projected_schema.hash(state);
2538
0
        self.filters.hash(state);
2539
0
        self.fetch.hash(state);
2540
0
    }
2541
}
2542
2543
impl TableScan {
2544
    /// Initialize TableScan with appropriate schema from the given
2545
    /// arguments.
2546
0
    pub fn try_new(
2547
0
        table_name: impl Into<TableReference>,
2548
0
        table_source: Arc<dyn TableSource>,
2549
0
        projection: Option<Vec<usize>>,
2550
0
        filters: Vec<Expr>,
2551
0
        fetch: Option<usize>,
2552
0
    ) -> Result<Self> {
2553
0
        let table_name = table_name.into();
2554
0
2555
0
        if table_name.table().is_empty() {
2556
0
            return plan_err!("table_name cannot be empty");
2557
0
        }
2558
0
        let schema = table_source.schema();
2559
0
        let func_dependencies = FunctionalDependencies::new_from_constraints(
2560
0
            table_source.constraints(),
2561
0
            schema.fields.len(),
2562
0
        );
2563
0
        let projected_schema = projection
2564
0
            .as_ref()
2565
0
            .map(|p| {
2566
0
                let projected_func_dependencies =
2567
0
                    func_dependencies.project_functional_dependencies(p, p.len());
2568
2569
0
                let df_schema = DFSchema::new_with_metadata(
2570
0
                    p.iter()
2571
0
                        .map(|i| {
2572
0
                            (Some(table_name.clone()), Arc::new(schema.field(*i).clone()))
2573
0
                        })
2574
0
                        .collect(),
2575
0
                    schema.metadata.clone(),
2576
0
                )?;
2577
0
                df_schema.with_functional_dependencies(projected_func_dependencies)
2578
0
            })
2579
0
            .unwrap_or_else(|| {
2580
0
                let df_schema =
2581
0
                    DFSchema::try_from_qualified_schema(table_name.clone(), &schema)?;
2582
0
                df_schema.with_functional_dependencies(func_dependencies)
2583
0
            })?;
2584
0
        let projected_schema = Arc::new(projected_schema);
2585
0
2586
0
        Ok(Self {
2587
0
            table_name,
2588
0
            source: table_source,
2589
0
            projection,
2590
0
            projected_schema,
2591
0
            filters,
2592
0
            fetch,
2593
0
        })
2594
0
    }
2595
}
2596
2597
/// Apply Cross Join to two logical plans
2598
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2599
pub struct CrossJoin {
2600
    /// Left input
2601
    pub left: Arc<LogicalPlan>,
2602
    /// Right input
2603
    pub right: Arc<LogicalPlan>,
2604
    /// The output schema, containing fields from the left and right inputs
2605
    pub schema: DFSchemaRef,
2606
}
2607
2608
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2609
impl PartialOrd for CrossJoin {
2610
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2611
0
        match self.left.partial_cmp(&other.left) {
2612
0
            Some(Ordering::Equal) => self.right.partial_cmp(&other.right),
2613
0
            cmp => cmp,
2614
        }
2615
0
    }
2616
}
2617
2618
/// Repartition the plan based on a partitioning scheme.
2619
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
2620
pub struct Repartition {
2621
    /// The incoming logical plan
2622
    pub input: Arc<LogicalPlan>,
2623
    /// The partitioning scheme
2624
    pub partitioning_scheme: Partitioning,
2625
}
2626
2627
/// Union multiple inputs
2628
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2629
pub struct Union {
2630
    /// Inputs to merge
2631
    pub inputs: Vec<Arc<LogicalPlan>>,
2632
    /// Union schema. Should be the same for all inputs.
2633
    pub schema: DFSchemaRef,
2634
}
2635
2636
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2637
impl PartialOrd for Union {
2638
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2639
0
        self.inputs.partial_cmp(&other.inputs)
2640
0
    }
2641
}
2642
2643
/// Prepare a statement but do not execute it. Prepare statements can have 0 or more
2644
/// `Expr::Placeholder` expressions that are filled in during execution
2645
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
2646
pub struct Prepare {
2647
    /// The name of the statement
2648
    pub name: String,
2649
    /// Data types of the parameters ([`Expr::Placeholder`])
2650
    pub data_types: Vec<DataType>,
2651
    /// The logical plan of the statements
2652
    pub input: Arc<LogicalPlan>,
2653
}
2654
2655
/// Describe the schema of table
2656
///
2657
/// # Example output:
2658
///
2659
/// ```sql
2660
/// > describe traces;
2661
/// +--------------------+-----------------------------+-------------+
2662
/// | column_name        | data_type                   | is_nullable |
2663
/// +--------------------+-----------------------------+-------------+
2664
/// | attributes         | Utf8                        | YES         |
2665
/// | duration_nano      | Int64                       | YES         |
2666
/// | end_time_unix_nano | Int64                       | YES         |
2667
/// | service.name       | Dictionary(Int32, Utf8)     | YES         |
2668
/// | span.kind          | Utf8                        | YES         |
2669
/// | span.name          | Utf8                        | YES         |
2670
/// | span_id            | Dictionary(Int32, Utf8)     | YES         |
2671
/// | time               | Timestamp(Nanosecond, None) | NO          |
2672
/// | trace_id           | Dictionary(Int32, Utf8)     | YES         |
2673
/// | otel.status_code   | Utf8                        | YES         |
2674
/// | parent_span_id     | Utf8                        | YES         |
2675
/// +--------------------+-----------------------------+-------------+
2676
/// ```
2677
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2678
pub struct DescribeTable {
2679
    /// Table schema
2680
    pub schema: Arc<Schema>,
2681
    /// schema of describe table output
2682
    pub output_schema: DFSchemaRef,
2683
}
2684
2685
// Manual implementation of `PartialOrd`, returning none since there are no comparable types in
2686
// `DescribeTable`. This allows `LogicalPlan` to derive `PartialOrd`.
2687
impl PartialOrd for DescribeTable {
2688
0
    fn partial_cmp(&self, _other: &Self) -> Option<Ordering> {
2689
0
        // There is no relevant comparison for schemas
2690
0
        None
2691
0
    }
2692
}
2693
2694
/// Produces a relation with string representations of
2695
/// various parts of the plan
2696
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2697
pub struct Explain {
2698
    /// Should extra (detailed, intermediate plans) be included?
2699
    pub verbose: bool,
2700
    /// The logical plan that is being EXPLAIN'd
2701
    pub plan: Arc<LogicalPlan>,
2702
    /// Represent the various stages plans have gone through
2703
    pub stringified_plans: Vec<StringifiedPlan>,
2704
    /// The output schema of the explain (2 columns of text)
2705
    pub schema: DFSchemaRef,
2706
    /// Used by physical planner to check if should proceed with planning
2707
    pub logical_optimization_succeeded: bool,
2708
}
2709
2710
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2711
impl PartialOrd for Explain {
2712
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2713
        #[derive(PartialEq, PartialOrd)]
2714
        struct ComparableExplain<'a> {
2715
            /// Should extra (detailed, intermediate plans) be included?
2716
            pub verbose: &'a bool,
2717
            /// The logical plan that is being EXPLAIN'd
2718
            pub plan: &'a Arc<LogicalPlan>,
2719
            /// Represent the various stages plans have gone through
2720
            pub stringified_plans: &'a Vec<StringifiedPlan>,
2721
            /// Used by physical planner to check if should proceed with planning
2722
            pub logical_optimization_succeeded: &'a bool,
2723
        }
2724
0
        let comparable_self = ComparableExplain {
2725
0
            verbose: &self.verbose,
2726
0
            plan: &self.plan,
2727
0
            stringified_plans: &self.stringified_plans,
2728
0
            logical_optimization_succeeded: &self.logical_optimization_succeeded,
2729
0
        };
2730
0
        let comparable_other = ComparableExplain {
2731
0
            verbose: &other.verbose,
2732
0
            plan: &other.plan,
2733
0
            stringified_plans: &other.stringified_plans,
2734
0
            logical_optimization_succeeded: &other.logical_optimization_succeeded,
2735
0
        };
2736
0
        comparable_self.partial_cmp(&comparable_other)
2737
0
    }
2738
}
2739
2740
/// Runs the actual plan, and then prints the physical plan with
2741
/// with execution metrics.
2742
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2743
pub struct Analyze {
2744
    /// Should extra detail be included?
2745
    pub verbose: bool,
2746
    /// The logical plan that is being EXPLAIN ANALYZE'd
2747
    pub input: Arc<LogicalPlan>,
2748
    /// The output schema of the explain (2 columns of text)
2749
    pub schema: DFSchemaRef,
2750
}
2751
2752
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2753
impl PartialOrd for Analyze {
2754
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2755
0
        match self.verbose.partial_cmp(&other.verbose) {
2756
0
            Some(Ordering::Equal) => self.input.partial_cmp(&other.input),
2757
0
            cmp => cmp,
2758
        }
2759
0
    }
2760
}
2761
2762
/// Extension operator defined outside of DataFusion
2763
// TODO(clippy): This clippy `allow` should be removed if
2764
// the manual `PartialEq` is removed in favor of a derive.
2765
// (see `PartialEq` the impl for details.)
2766
#[allow(clippy::derived_hash_with_manual_eq)]
2767
#[derive(Debug, Clone, Eq, Hash)]
2768
pub struct Extension {
2769
    /// The runtime extension operator
2770
    pub node: Arc<dyn UserDefinedLogicalNode>,
2771
}
2772
2773
// `PartialEq` cannot be derived for types containing `Arc<dyn Trait>`.
2774
// This manual implementation should be removed if
2775
// https://github.com/rust-lang/rust/issues/39128 is fixed.
2776
impl PartialEq for Extension {
2777
0
    fn eq(&self, other: &Self) -> bool {
2778
0
        self.node.eq(&other.node)
2779
0
    }
2780
}
2781
2782
impl PartialOrd for Extension {
2783
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2784
0
        self.node.partial_cmp(&other.node)
2785
0
    }
2786
}
2787
2788
/// Produces the first `n` tuples from its input and discards the rest.
2789
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
2790
pub struct Limit {
2791
    /// Number of rows to skip before fetch
2792
    pub skip: usize,
2793
    /// Maximum number of rows to fetch,
2794
    /// None means fetching all rows
2795
    pub fetch: Option<usize>,
2796
    /// The logical plan
2797
    pub input: Arc<LogicalPlan>,
2798
}
2799
2800
/// Removes duplicate rows from the input
2801
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
2802
pub enum Distinct {
2803
    /// Plain `DISTINCT` referencing all selection expressions
2804
    All(Arc<LogicalPlan>),
2805
    /// The `Postgres` addition, allowing separate control over DISTINCT'd and selected columns
2806
    On(DistinctOn),
2807
}
2808
2809
impl Distinct {
2810
    /// return a reference to the nodes input
2811
0
    pub fn input(&self) -> &Arc<LogicalPlan> {
2812
0
        match self {
2813
0
            Distinct::All(input) => input,
2814
0
            Distinct::On(DistinctOn { input, .. }) => input,
2815
        }
2816
0
    }
2817
}
2818
2819
/// Removes duplicate rows from the input
2820
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2821
pub struct DistinctOn {
2822
    /// The `DISTINCT ON` clause expression list
2823
    pub on_expr: Vec<Expr>,
2824
    /// The selected projection expression list
2825
    pub select_expr: Vec<Expr>,
2826
    /// The `ORDER BY` clause, whose initial expressions must match those of the `ON` clause when
2827
    /// present. Note that those matching expressions actually wrap the `ON` expressions with
2828
    /// additional info pertaining to the sorting procedure (i.e. ASC/DESC, and NULLS FIRST/LAST).
2829
    pub sort_expr: Option<Vec<SortExpr>>,
2830
    /// The logical plan that is being DISTINCT'd
2831
    pub input: Arc<LogicalPlan>,
2832
    /// The schema description of the DISTINCT ON output
2833
    pub schema: DFSchemaRef,
2834
}
2835
2836
impl DistinctOn {
2837
    /// Create a new `DistinctOn` struct.
2838
0
    pub fn try_new(
2839
0
        on_expr: Vec<Expr>,
2840
0
        select_expr: Vec<Expr>,
2841
0
        sort_expr: Option<Vec<SortExpr>>,
2842
0
        input: Arc<LogicalPlan>,
2843
0
    ) -> Result<Self> {
2844
0
        if on_expr.is_empty() {
2845
0
            return plan_err!("No `ON` expressions provided");
2846
0
        }
2847
2848
0
        let on_expr = normalize_cols(on_expr, input.as_ref())?;
2849
0
        let qualified_fields = exprlist_to_fields(select_expr.as_slice(), &input)?
2850
0
            .into_iter()
2851
0
            .collect();
2852
2853
0
        let dfschema = DFSchema::new_with_metadata(
2854
0
            qualified_fields,
2855
0
            input.schema().metadata().clone(),
2856
0
        )?;
2857
2858
0
        let mut distinct_on = DistinctOn {
2859
0
            on_expr,
2860
0
            select_expr,
2861
0
            sort_expr: None,
2862
0
            input,
2863
0
            schema: Arc::new(dfschema),
2864
0
        };
2865
2866
0
        if let Some(sort_expr) = sort_expr {
2867
0
            distinct_on = distinct_on.with_sort_expr(sort_expr)?;
2868
0
        }
2869
2870
0
        Ok(distinct_on)
2871
0
    }
2872
2873
    /// Try to update `self` with a new sort expressions.
2874
    ///
2875
    /// Validates that the sort expressions are a super-set of the `ON` expressions.
2876
0
    pub fn with_sort_expr(mut self, sort_expr: Vec<SortExpr>) -> Result<Self> {
2877
0
        let sort_expr = normalize_sorts(sort_expr, self.input.as_ref())?;
2878
2879
        // Check that the left-most sort expressions are the same as the `ON` expressions.
2880
0
        let mut matched = true;
2881
0
        for (on, sort) in self.on_expr.iter().zip(sort_expr.iter()) {
2882
0
            if on != &sort.expr {
2883
0
                matched = false;
2884
0
                break;
2885
0
            }
2886
        }
2887
2888
0
        if self.on_expr.len() > sort_expr.len() || !matched {
2889
0
            return plan_err!(
2890
0
                "SELECT DISTINCT ON expressions must match initial ORDER BY expressions"
2891
0
            );
2892
0
        }
2893
0
2894
0
        self.sort_expr = Some(sort_expr);
2895
0
        Ok(self)
2896
0
    }
2897
}
2898
2899
// Manual implementation needed because of `schema` field. Comparison excludes this field.
2900
impl PartialOrd for DistinctOn {
2901
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2902
        #[derive(PartialEq, PartialOrd)]
2903
        struct ComparableDistinctOn<'a> {
2904
            /// The `DISTINCT ON` clause expression list
2905
            pub on_expr: &'a Vec<Expr>,
2906
            /// The selected projection expression list
2907
            pub select_expr: &'a Vec<Expr>,
2908
            /// The `ORDER BY` clause, whose initial expressions must match those of the `ON` clause when
2909
            /// present. Note that those matching expressions actually wrap the `ON` expressions with
2910
            /// additional info pertaining to the sorting procedure (i.e. ASC/DESC, and NULLS FIRST/LAST).
2911
            pub sort_expr: &'a Option<Vec<SortExpr>>,
2912
            /// The logical plan that is being DISTINCT'd
2913
            pub input: &'a Arc<LogicalPlan>,
2914
        }
2915
0
        let comparable_self = ComparableDistinctOn {
2916
0
            on_expr: &self.on_expr,
2917
0
            select_expr: &self.select_expr,
2918
0
            sort_expr: &self.sort_expr,
2919
0
            input: &self.input,
2920
0
        };
2921
0
        let comparable_other = ComparableDistinctOn {
2922
0
            on_expr: &other.on_expr,
2923
0
            select_expr: &other.select_expr,
2924
0
            sort_expr: &other.sort_expr,
2925
0
            input: &other.input,
2926
0
        };
2927
0
        comparable_self.partial_cmp(&comparable_other)
2928
0
    }
2929
}
2930
2931
/// Aggregates its input based on a set of grouping and aggregate
2932
/// expressions (e.g. SUM).
2933
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2934
// mark non_exhaustive to encourage use of try_new/new()
2935
#[non_exhaustive]
2936
pub struct Aggregate {
2937
    /// The incoming logical plan
2938
    pub input: Arc<LogicalPlan>,
2939
    /// Grouping expressions
2940
    pub group_expr: Vec<Expr>,
2941
    /// Aggregate expressions
2942
    pub aggr_expr: Vec<Expr>,
2943
    /// The schema description of the aggregate output
2944
    pub schema: DFSchemaRef,
2945
}
2946
2947
impl Aggregate {
2948
    /// Create a new aggregate operator.
2949
0
    pub fn try_new(
2950
0
        input: Arc<LogicalPlan>,
2951
0
        group_expr: Vec<Expr>,
2952
0
        aggr_expr: Vec<Expr>,
2953
0
    ) -> Result<Self> {
2954
0
        let group_expr = enumerate_grouping_sets(group_expr)?;
2955
2956
0
        let is_grouping_set = matches!(group_expr.as_slice(), [Expr::GroupingSet(_)]);
2957
2958
0
        let grouping_expr: Vec<&Expr> = grouping_set_to_exprlist(group_expr.as_slice())?;
2959
2960
0
        let mut qualified_fields = exprlist_to_fields(grouping_expr, &input)?;
2961
2962
        // Even columns that cannot be null will become nullable when used in a grouping set.
2963
0
        if is_grouping_set {
2964
0
            qualified_fields = qualified_fields
2965
0
                .into_iter()
2966
0
                .map(|(q, f)| (q, f.as_ref().clone().with_nullable(true).into()))
2967
0
                .collect::<Vec<_>>();
2968
0
        }
2969
2970
0
        qualified_fields.extend(exprlist_to_fields(aggr_expr.as_slice(), &input)?);
2971
2972
0
        let schema = DFSchema::new_with_metadata(
2973
0
            qualified_fields,
2974
0
            input.schema().metadata().clone(),
2975
0
        )?;
2976
2977
0
        Self::try_new_with_schema(input, group_expr, aggr_expr, Arc::new(schema))
2978
0
    }
2979
2980
    /// Create a new aggregate operator using the provided schema to avoid the overhead of
2981
    /// building the schema again when the schema is already known.
2982
    ///
2983
    /// This method should only be called when you are absolutely sure that the schema being
2984
    /// provided is correct for the aggregate. If in doubt, call [try_new](Self::try_new) instead.
2985
0
    pub fn try_new_with_schema(
2986
0
        input: Arc<LogicalPlan>,
2987
0
        group_expr: Vec<Expr>,
2988
0
        aggr_expr: Vec<Expr>,
2989
0
        schema: DFSchemaRef,
2990
0
    ) -> Result<Self> {
2991
0
        if group_expr.is_empty() && aggr_expr.is_empty() {
2992
0
            return plan_err!(
2993
0
                "Aggregate requires at least one grouping or aggregate expression"
2994
0
            );
2995
0
        }
2996
0
        let group_expr_count = grouping_set_expr_count(&group_expr)?;
2997
0
        if schema.fields().len() != group_expr_count + aggr_expr.len() {
2998
0
            return plan_err!(
2999
0
                "Aggregate schema has wrong number of fields. Expected {} got {}",
3000
0
                group_expr_count + aggr_expr.len(),
3001
0
                schema.fields().len()
3002
0
            );
3003
0
        }
3004
3005
0
        let aggregate_func_dependencies =
3006
0
            calc_func_dependencies_for_aggregate(&group_expr, &input, &schema)?;
3007
0
        let new_schema = schema.as_ref().clone();
3008
0
        let schema = Arc::new(
3009
0
            new_schema.with_functional_dependencies(aggregate_func_dependencies)?,
3010
        );
3011
0
        Ok(Self {
3012
0
            input,
3013
0
            group_expr,
3014
0
            aggr_expr,
3015
0
            schema,
3016
0
        })
3017
0
    }
3018
3019
    /// Get the output expressions.
3020
0
    fn output_expressions(&self) -> Result<Vec<&Expr>> {
3021
0
        let mut exprs = grouping_set_to_exprlist(self.group_expr.as_slice())?;
3022
0
        exprs.extend(self.aggr_expr.iter());
3023
0
        debug_assert!(exprs.len() == self.schema.fields().len());
3024
0
        Ok(exprs)
3025
0
    }
3026
3027
    /// Get the length of the group by expression in the output schema
3028
    /// This is not simply group by expression length. Expression may be
3029
    /// GroupingSet, etc. In these case we need to get inner expression lengths.
3030
0
    pub fn group_expr_len(&self) -> Result<usize> {
3031
0
        grouping_set_expr_count(&self.group_expr)
3032
0
    }
3033
}
3034
3035
// Manual implementation needed because of `schema` field. Comparison excludes this field.
3036
impl PartialOrd for Aggregate {
3037
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3038
0
        match self.input.partial_cmp(&other.input) {
3039
            Some(Ordering::Equal) => {
3040
0
                match self.group_expr.partial_cmp(&other.group_expr) {
3041
0
                    Some(Ordering::Equal) => self.aggr_expr.partial_cmp(&other.aggr_expr),
3042
0
                    cmp => cmp,
3043
                }
3044
            }
3045
0
            cmp => cmp,
3046
        }
3047
0
    }
3048
}
3049
3050
/// Checks whether any expression in `group_expr` contains `Expr::GroupingSet`.
3051
0
fn contains_grouping_set(group_expr: &[Expr]) -> bool {
3052
0
    group_expr
3053
0
        .iter()
3054
0
        .any(|expr| matches!(expr, Expr::GroupingSet(_)))
3055
0
}
3056
3057
/// Calculates functional dependencies for aggregate expressions.
3058
0
fn calc_func_dependencies_for_aggregate(
3059
0
    // Expressions in the GROUP BY clause:
3060
0
    group_expr: &[Expr],
3061
0
    // Input plan of the aggregate:
3062
0
    input: &LogicalPlan,
3063
0
    // Aggregate schema
3064
0
    aggr_schema: &DFSchema,
3065
0
) -> Result<FunctionalDependencies> {
3066
0
    // We can do a case analysis on how to propagate functional dependencies based on
3067
0
    // whether the GROUP BY in question contains a grouping set expression:
3068
0
    // - If so, the functional dependencies will be empty because we cannot guarantee
3069
0
    //   that GROUP BY expression results will be unique.
3070
0
    // - Otherwise, it may be possible to propagate functional dependencies.
3071
0
    if !contains_grouping_set(group_expr) {
3072
0
        let group_by_expr_names = group_expr
3073
0
            .iter()
3074
0
            .map(|item| item.schema_name().to_string())
3075
0
            .collect::<IndexSet<_>>()
3076
0
            .into_iter()
3077
0
            .collect::<Vec<_>>();
3078
0
        let aggregate_func_dependencies = aggregate_functional_dependencies(
3079
0
            input.schema(),
3080
0
            &group_by_expr_names,
3081
0
            aggr_schema,
3082
0
        );
3083
0
        Ok(aggregate_func_dependencies)
3084
    } else {
3085
0
        Ok(FunctionalDependencies::empty())
3086
    }
3087
0
}
3088
3089
/// This function projects functional dependencies of the `input` plan according
3090
/// to projection expressions `exprs`.
3091
0
fn calc_func_dependencies_for_project(
3092
0
    exprs: &[Expr],
3093
0
    input: &LogicalPlan,
3094
0
) -> Result<FunctionalDependencies> {
3095
0
    let input_fields = input.schema().field_names();
3096
    // Calculate expression indices (if present) in the input schema.
3097
0
    let proj_indices = exprs
3098
0
        .iter()
3099
0
        .map(|expr| match expr {
3100
0
            Expr::Wildcard { qualifier, options } => {
3101
0
                let wildcard_fields = exprlist_to_fields(
3102
0
                    vec![&Expr::Wildcard {
3103
0
                        qualifier: qualifier.clone(),
3104
0
                        options: options.clone(),
3105
0
                    }],
3106
0
                    input,
3107
0
                )?;
3108
0
                Ok::<_, DataFusionError>(
3109
0
                    wildcard_fields
3110
0
                        .into_iter()
3111
0
                        .filter_map(|(qualifier, f)| {
3112
0
                            let flat_name = qualifier
3113
0
                                .map(|t| format!("{}.{}", t, f.name()))
3114
0
                                .unwrap_or_else(|| f.name().clone());
3115
0
                            input_fields.iter().position(|item| *item == flat_name)
3116
0
                        })
3117
0
                        .collect::<Vec<_>>(),
3118
0
                )
3119
            }
3120
0
            Expr::Alias(alias) => {
3121
0
                let name = format!("{}", alias.expr);
3122
0
                Ok(input_fields
3123
0
                    .iter()
3124
0
                    .position(|item| *item == name)
3125
0
                    .map(|i| vec![i])
3126
0
                    .unwrap_or(vec![]))
3127
            }
3128
            _ => {
3129
0
                let name = format!("{}", expr);
3130
0
                Ok(input_fields
3131
0
                    .iter()
3132
0
                    .position(|item| *item == name)
3133
0
                    .map(|i| vec![i])
3134
0
                    .unwrap_or(vec![]))
3135
            }
3136
0
        })
3137
0
        .collect::<Result<Vec<_>>>()?
3138
0
        .into_iter()
3139
0
        .flatten()
3140
0
        .collect::<Vec<_>>();
3141
3142
0
    let len = exprlist_len(exprs, input.schema(), Some(find_base_plan(input).schema()))?;
3143
0
    Ok(input
3144
0
        .schema()
3145
0
        .functional_dependencies()
3146
0
        .project_functional_dependencies(&proj_indices, len))
3147
0
}
3148
3149
/// Sorts its input according to a list of sort expressions.
3150
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
3151
pub struct Sort {
3152
    /// The sort expressions
3153
    pub expr: Vec<SortExpr>,
3154
    /// The incoming logical plan
3155
    pub input: Arc<LogicalPlan>,
3156
    /// Optional fetch limit
3157
    pub fetch: Option<usize>,
3158
}
3159
3160
/// Join two logical plans on one or more join columns
3161
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3162
pub struct Join {
3163
    /// Left input
3164
    pub left: Arc<LogicalPlan>,
3165
    /// Right input
3166
    pub right: Arc<LogicalPlan>,
3167
    /// Equijoin clause expressed as pairs of (left, right) join expressions
3168
    pub on: Vec<(Expr, Expr)>,
3169
    /// Filters applied during join (non-equi conditions)
3170
    pub filter: Option<Expr>,
3171
    /// Join type
3172
    pub join_type: JoinType,
3173
    /// Join constraint
3174
    pub join_constraint: JoinConstraint,
3175
    /// The output schema, containing fields from the left and right inputs
3176
    pub schema: DFSchemaRef,
3177
    /// If null_equals_null is true, null == null else null != null
3178
    pub null_equals_null: bool,
3179
}
3180
3181
impl Join {
3182
    /// Create Join with input which wrapped with projection, this method is used to help create physical join.
3183
0
    pub fn try_new_with_project_input(
3184
0
        original: &LogicalPlan,
3185
0
        left: Arc<LogicalPlan>,
3186
0
        right: Arc<LogicalPlan>,
3187
0
        column_on: (Vec<Column>, Vec<Column>),
3188
0
    ) -> Result<Self> {
3189
0
        let original_join = match original {
3190
0
            LogicalPlan::Join(join) => join,
3191
0
            _ => return plan_err!("Could not create join with project input"),
3192
        };
3193
3194
0
        let on: Vec<(Expr, Expr)> = column_on
3195
0
            .0
3196
0
            .into_iter()
3197
0
            .zip(column_on.1)
3198
0
            .map(|(l, r)| (Expr::Column(l), Expr::Column(r)))
3199
0
            .collect();
3200
0
        let join_schema =
3201
0
            build_join_schema(left.schema(), right.schema(), &original_join.join_type)?;
3202
3203
0
        Ok(Join {
3204
0
            left,
3205
0
            right,
3206
0
            on,
3207
0
            filter: original_join.filter.clone(),
3208
0
            join_type: original_join.join_type,
3209
0
            join_constraint: original_join.join_constraint,
3210
0
            schema: Arc::new(join_schema),
3211
0
            null_equals_null: original_join.null_equals_null,
3212
0
        })
3213
0
    }
3214
}
3215
3216
// Manual implementation needed because of `schema` field. Comparison excludes this field.
3217
impl PartialOrd for Join {
3218
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3219
        #[derive(PartialEq, PartialOrd)]
3220
        struct ComparableJoin<'a> {
3221
            /// Left input
3222
            pub left: &'a Arc<LogicalPlan>,
3223
            /// Right input
3224
            pub right: &'a Arc<LogicalPlan>,
3225
            /// Equijoin clause expressed as pairs of (left, right) join expressions
3226
            pub on: &'a Vec<(Expr, Expr)>,
3227
            /// Filters applied during join (non-equi conditions)
3228
            pub filter: &'a Option<Expr>,
3229
            /// Join type
3230
            pub join_type: &'a JoinType,
3231
            /// Join constraint
3232
            pub join_constraint: &'a JoinConstraint,
3233
            /// If null_equals_null is true, null == null else null != null
3234
            pub null_equals_null: &'a bool,
3235
        }
3236
0
        let comparable_self = ComparableJoin {
3237
0
            left: &self.left,
3238
0
            right: &self.right,
3239
0
            on: &self.on,
3240
0
            filter: &self.filter,
3241
0
            join_type: &self.join_type,
3242
0
            join_constraint: &self.join_constraint,
3243
0
            null_equals_null: &self.null_equals_null,
3244
0
        };
3245
0
        let comparable_other = ComparableJoin {
3246
0
            left: &other.left,
3247
0
            right: &other.right,
3248
0
            on: &other.on,
3249
0
            filter: &other.filter,
3250
0
            join_type: &other.join_type,
3251
0
            join_constraint: &other.join_constraint,
3252
0
            null_equals_null: &other.null_equals_null,
3253
0
        };
3254
0
        comparable_self.partial_cmp(&comparable_other)
3255
0
    }
3256
}
3257
3258
/// Subquery
3259
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash)]
3260
pub struct Subquery {
3261
    /// The subquery
3262
    pub subquery: Arc<LogicalPlan>,
3263
    /// The outer references used in the subquery
3264
    pub outer_ref_columns: Vec<Expr>,
3265
}
3266
3267
impl Subquery {
3268
0
    pub fn try_from_expr(plan: &Expr) -> Result<&Subquery> {
3269
0
        match plan {
3270
0
            Expr::ScalarSubquery(it) => Ok(it),
3271
0
            Expr::Cast(cast) => Subquery::try_from_expr(cast.expr.as_ref()),
3272
0
            _ => plan_err!("Could not coerce into ScalarSubquery!"),
3273
        }
3274
0
    }
3275
3276
0
    pub fn with_plan(&self, plan: Arc<LogicalPlan>) -> Subquery {
3277
0
        Subquery {
3278
0
            subquery: plan,
3279
0
            outer_ref_columns: self.outer_ref_columns.clone(),
3280
0
        }
3281
0
    }
3282
}
3283
3284
impl Debug for Subquery {
3285
0
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
3286
0
        write!(f, "<subquery>")
3287
0
    }
3288
}
3289
3290
/// Logical partitioning schemes supported by [`LogicalPlan::Repartition`]
3291
///
3292
/// See [`Partitioning`] for more details on partitioning
3293
///
3294
/// [`Partitioning`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/enum.Partitioning.html#
3295
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
3296
pub enum Partitioning {
3297
    /// Allocate batches using a round-robin algorithm and the specified number of partitions
3298
    RoundRobinBatch(usize),
3299
    /// Allocate rows based on a hash of one of more expressions and the specified number
3300
    /// of partitions.
3301
    Hash(Vec<Expr>, usize),
3302
    /// The DISTRIBUTE BY clause is used to repartition the data based on the input expressions
3303
    DistributeBy(Vec<Expr>),
3304
}
3305
3306
/// Represents the unnesting operation on a column based on the context (a known struct
3307
/// column, a list column, or let the planner infer the unnesting type).
3308
///
3309
/// The inferred unnesting type works for both struct and list column, but the unnesting
3310
/// will only be done once (depth = 1). In case recursion is needed on a multi-dimensional
3311
/// list type, use [`ColumnUnnestList`]
3312
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
3313
pub enum ColumnUnnestType {
3314
    // Unnesting a list column, a vector of ColumnUnnestList is used because
3315
    // a column can be unnested at different levels, resulting different output columns
3316
    List(Vec<ColumnUnnestList>),
3317
    // for struct, there can only be one unnest performed on one column at a time
3318
    Struct,
3319
    // Infer the unnest type based on column schema
3320
    // If column is a list column, the unnest depth will be 1
3321
    // This value is to support sugar syntax of old api in Dataframe (unnest(either_list_or_struct_column))
3322
    Inferred,
3323
}
3324
3325
impl fmt::Display for ColumnUnnestType {
3326
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3327
0
        match self {
3328
0
            ColumnUnnestType::List(lists) => {
3329
0
                let list_strs: Vec<String> =
3330
0
                    lists.iter().map(|list| list.to_string()).collect();
3331
0
                write!(f, "List([{}])", list_strs.join(", "))
3332
            }
3333
0
            ColumnUnnestType::Struct => write!(f, "Struct"),
3334
0
            ColumnUnnestType::Inferred => write!(f, "Inferred"),
3335
        }
3336
0
    }
3337
}
3338
3339
/// Represent the unnesting operation on a list column, such as the recursion depth and
3340
/// the output column name after unnesting
3341
///
3342
/// Example: given `ColumnUnnestList { output_column: "output_name", depth: 2 }`
3343
///
3344
/// ```text
3345
///   input             output_name
3346
///  ┌─────────┐      ┌─────────┐
3347
///  │{{1,2}}  │      │ 1       │
3348
///  ├─────────┼─────►├─────────┤           
3349
///  │{{3}}    │      │ 2       │           
3350
///  ├─────────┤      ├─────────┤           
3351
///  │{{4},{5}}│      │ 3       │           
3352
///  └─────────┘      ├─────────┤           
3353
///                   │ 4       │           
3354
///                   ├─────────┤           
3355
///                   │ 5       │           
3356
///                   └─────────┘           
3357
/// ```
3358
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
3359
pub struct ColumnUnnestList {
3360
    pub output_column: Column,
3361
    pub depth: usize,
3362
}
3363
3364
impl fmt::Display for ColumnUnnestList {
3365
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3366
0
        write!(f, "{}|depth={}", self.output_column, self.depth)
3367
0
    }
3368
}
3369
3370
/// Unnest a column that contains a nested list type. See
3371
/// [`UnnestOptions`] for more details.
3372
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3373
pub struct Unnest {
3374
    /// The incoming logical plan
3375
    pub input: Arc<LogicalPlan>,
3376
    /// Columns to run unnest on, can be a list of (List/Struct) columns
3377
    pub exec_columns: Vec<(Column, ColumnUnnestType)>,
3378
    /// refer to the indices(in the input schema) of columns
3379
    /// that have type list to run unnest on
3380
    pub list_type_columns: Vec<(usize, ColumnUnnestList)>,
3381
    /// refer to the indices (in the input schema) of columns
3382
    /// that have type struct to run unnest on
3383
    pub struct_type_columns: Vec<usize>,
3384
    /// Having items aligned with the output columns
3385
    /// representing which column in the input schema each output column depends on
3386
    pub dependency_indices: Vec<usize>,
3387
    /// The output schema, containing the unnested field column.
3388
    pub schema: DFSchemaRef,
3389
    /// Options
3390
    pub options: UnnestOptions,
3391
}
3392
3393
// Manual implementation needed because of `schema` field. Comparison excludes this field.
3394
impl PartialOrd for Unnest {
3395
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3396
        #[derive(PartialEq, PartialOrd)]
3397
        struct ComparableUnnest<'a> {
3398
            /// The incoming logical plan
3399
            pub input: &'a Arc<LogicalPlan>,
3400
            /// Columns to run unnest on, can be a list of (List/Struct) columns
3401
            pub exec_columns: &'a Vec<(Column, ColumnUnnestType)>,
3402
            /// refer to the indices(in the input schema) of columns
3403
            /// that have type list to run unnest on
3404
            pub list_type_columns: &'a Vec<(usize, ColumnUnnestList)>,
3405
            /// refer to the indices (in the input schema) of columns
3406
            /// that have type struct to run unnest on
3407
            pub struct_type_columns: &'a Vec<usize>,
3408
            /// Having items aligned with the output columns
3409
            /// representing which column in the input schema each output column depends on
3410
            pub dependency_indices: &'a Vec<usize>,
3411
            /// Options
3412
            pub options: &'a UnnestOptions,
3413
        }
3414
0
        let comparable_self = ComparableUnnest {
3415
0
            input: &self.input,
3416
0
            exec_columns: &self.exec_columns,
3417
0
            list_type_columns: &self.list_type_columns,
3418
0
            struct_type_columns: &self.struct_type_columns,
3419
0
            dependency_indices: &self.dependency_indices,
3420
0
            options: &self.options,
3421
0
        };
3422
0
        let comparable_other = ComparableUnnest {
3423
0
            input: &other.input,
3424
0
            exec_columns: &other.exec_columns,
3425
0
            list_type_columns: &other.list_type_columns,
3426
0
            struct_type_columns: &other.struct_type_columns,
3427
0
            dependency_indices: &other.dependency_indices,
3428
0
            options: &other.options,
3429
0
        };
3430
0
        comparable_self.partial_cmp(&comparable_other)
3431
0
    }
3432
}
3433
3434
#[cfg(test)]
3435
mod tests {
3436
3437
    use super::*;
3438
    use crate::builder::LogicalTableSource;
3439
    use crate::logical_plan::table_scan;
3440
    use crate::{col, exists, in_subquery, lit, placeholder, GroupingSet};
3441
3442
    use datafusion_common::tree_node::{TransformedResult, TreeNodeVisitor};
3443
    use datafusion_common::{not_impl_err, Constraint, ScalarValue};
3444
3445
    use crate::test::function_stub::count;
3446
3447
    fn employee_schema() -> Schema {
3448
        Schema::new(vec![
3449
            Field::new("id", DataType::Int32, false),
3450
            Field::new("first_name", DataType::Utf8, false),
3451
            Field::new("last_name", DataType::Utf8, false),
3452
            Field::new("state", DataType::Utf8, false),
3453
            Field::new("salary", DataType::Int32, false),
3454
        ])
3455
    }
3456
3457
    fn display_plan() -> Result<LogicalPlan> {
3458
        let plan1 = table_scan(Some("employee_csv"), &employee_schema(), Some(vec![3]))?
3459
            .build()?;
3460
3461
        table_scan(Some("employee_csv"), &employee_schema(), Some(vec![0, 3]))?
3462
            .filter(in_subquery(col("state"), Arc::new(plan1)))?
3463
            .project(vec![col("id")])?
3464
            .build()
3465
    }
3466
3467
    #[test]
3468
    fn test_display_indent() -> Result<()> {
3469
        let plan = display_plan()?;
3470
3471
        let expected = "Projection: employee_csv.id\
3472
        \n  Filter: employee_csv.state IN (<subquery>)\
3473
        \n    Subquery:\
3474
        \n      TableScan: employee_csv projection=[state]\
3475
        \n    TableScan: employee_csv projection=[id, state]";
3476
3477
        assert_eq!(expected, format!("{}", plan.display_indent()));
3478
        Ok(())
3479
    }
3480
3481
    #[test]
3482
    fn test_display_indent_schema() -> Result<()> {
3483
        let plan = display_plan()?;
3484
3485
        let expected = "Projection: employee_csv.id [id:Int32]\
3486
        \n  Filter: employee_csv.state IN (<subquery>) [id:Int32, state:Utf8]\
3487
        \n    Subquery: [state:Utf8]\
3488
        \n      TableScan: employee_csv projection=[state] [state:Utf8]\
3489
        \n    TableScan: employee_csv projection=[id, state] [id:Int32, state:Utf8]";
3490
3491
        assert_eq!(expected, format!("{}", plan.display_indent_schema()));
3492
        Ok(())
3493
    }
3494
3495
    #[test]
3496
    fn test_display_subquery_alias() -> Result<()> {
3497
        let plan1 = table_scan(Some("employee_csv"), &employee_schema(), Some(vec![3]))?
3498
            .build()?;
3499
        let plan1 = Arc::new(plan1);
3500
3501
        let plan =
3502
            table_scan(Some("employee_csv"), &employee_schema(), Some(vec![0, 3]))?
3503
                .project(vec![col("id"), exists(plan1).alias("exists")])?
3504
                .build();
3505
3506
        let expected = "Projection: employee_csv.id, EXISTS (<subquery>) AS exists\
3507
        \n  Subquery:\
3508
        \n    TableScan: employee_csv projection=[state]\
3509
        \n  TableScan: employee_csv projection=[id, state]";
3510
3511
        assert_eq!(expected, format!("{}", plan?.display_indent()));
3512
        Ok(())
3513
    }
3514
3515
    #[test]
3516
    fn test_display_graphviz() -> Result<()> {
3517
        let plan = display_plan()?;
3518
3519
        let expected_graphviz = r#"
3520
// Begin DataFusion GraphViz Plan,
3521
// display it online here: https://dreampuf.github.io/GraphvizOnline
3522
3523
digraph {
3524
  subgraph cluster_1
3525
  {
3526
    graph[label="LogicalPlan"]
3527
    2[shape=box label="Projection: employee_csv.id"]
3528
    3[shape=box label="Filter: employee_csv.state IN (<subquery>)"]
3529
    2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]
3530
    4[shape=box label="Subquery:"]
3531
    3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]
3532
    5[shape=box label="TableScan: employee_csv projection=[state]"]
3533
    4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]
3534
    6[shape=box label="TableScan: employee_csv projection=[id, state]"]
3535
    3 -> 6 [arrowhead=none, arrowtail=normal, dir=back]
3536
  }
3537
  subgraph cluster_7
3538
  {
3539
    graph[label="Detailed LogicalPlan"]
3540
    8[shape=box label="Projection: employee_csv.id\nSchema: [id:Int32]"]
3541
    9[shape=box label="Filter: employee_csv.state IN (<subquery>)\nSchema: [id:Int32, state:Utf8]"]
3542
    8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]
3543
    10[shape=box label="Subquery:\nSchema: [state:Utf8]"]
3544
    9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]
3545
    11[shape=box label="TableScan: employee_csv projection=[state]\nSchema: [state:Utf8]"]
3546
    10 -> 11 [arrowhead=none, arrowtail=normal, dir=back]
3547
    12[shape=box label="TableScan: employee_csv projection=[id, state]\nSchema: [id:Int32, state:Utf8]"]
3548
    9 -> 12 [arrowhead=none, arrowtail=normal, dir=back]
3549
  }
3550
}
3551
// End DataFusion GraphViz Plan
3552
"#;
3553
3554
        // just test for a few key lines in the output rather than the
3555
        // whole thing to make test mainteance easier.
3556
        let graphviz = format!("{}", plan.display_graphviz());
3557
3558
        assert_eq!(expected_graphviz, graphviz);
3559
        Ok(())
3560
    }
3561
3562
    #[test]
3563
    fn test_display_pg_json() -> Result<()> {
3564
        let plan = display_plan()?;
3565
3566
        let expected_pg_json = r#"[
3567
  {
3568
    "Plan": {
3569
      "Expressions": [
3570
        "employee_csv.id"
3571
      ],
3572
      "Node Type": "Projection",
3573
      "Output": [
3574
        "id"
3575
      ],
3576
      "Plans": [
3577
        {
3578
          "Condition": "employee_csv.state IN (<subquery>)",
3579
          "Node Type": "Filter",
3580
          "Output": [
3581
            "id",
3582
            "state"
3583
          ],
3584
          "Plans": [
3585
            {
3586
              "Node Type": "Subquery",
3587
              "Output": [
3588
                "state"
3589
              ],
3590
              "Plans": [
3591
                {
3592
                  "Node Type": "TableScan",
3593
                  "Output": [
3594
                    "state"
3595
                  ],
3596
                  "Plans": [],
3597
                  "Relation Name": "employee_csv"
3598
                }
3599
              ]
3600
            },
3601
            {
3602
              "Node Type": "TableScan",
3603
              "Output": [
3604
                "id",
3605
                "state"
3606
              ],
3607
              "Plans": [],
3608
              "Relation Name": "employee_csv"
3609
            }
3610
          ]
3611
        }
3612
      ]
3613
    }
3614
  }
3615
]"#;
3616
3617
        let pg_json = format!("{}", plan.display_pg_json());
3618
3619
        assert_eq!(expected_pg_json, pg_json);
3620
        Ok(())
3621
    }
3622
3623
    /// Tests for the Visitor trait and walking logical plan nodes
3624
    #[derive(Debug, Default)]
3625
    struct OkVisitor {
3626
        strings: Vec<String>,
3627
    }
3628
3629
    impl<'n> TreeNodeVisitor<'n> for OkVisitor {
3630
        type Node = LogicalPlan;
3631
3632
        fn f_down(&mut self, plan: &'n LogicalPlan) -> Result<TreeNodeRecursion> {
3633
            let s = match plan {
3634
                LogicalPlan::Projection { .. } => "pre_visit Projection",
3635
                LogicalPlan::Filter { .. } => "pre_visit Filter",
3636
                LogicalPlan::TableScan { .. } => "pre_visit TableScan",
3637
                _ => {
3638
                    return not_impl_err!("unknown plan type");
3639
                }
3640
            };
3641
3642
            self.strings.push(s.into());
3643
            Ok(TreeNodeRecursion::Continue)
3644
        }
3645
3646
        fn f_up(&mut self, plan: &'n LogicalPlan) -> Result<TreeNodeRecursion> {
3647
            let s = match plan {
3648
                LogicalPlan::Projection { .. } => "post_visit Projection",
3649
                LogicalPlan::Filter { .. } => "post_visit Filter",
3650
                LogicalPlan::TableScan { .. } => "post_visit TableScan",
3651
                _ => {
3652
                    return not_impl_err!("unknown plan type");
3653
                }
3654
            };
3655
3656
            self.strings.push(s.into());
3657
            Ok(TreeNodeRecursion::Continue)
3658
        }
3659
    }
3660
3661
    #[test]
3662
    fn visit_order() {
3663
        let mut visitor = OkVisitor::default();
3664
        let plan = test_plan();
3665
        let res = plan.visit_with_subqueries(&mut visitor);
3666
        assert!(res.is_ok());
3667
3668
        assert_eq!(
3669
            visitor.strings,
3670
            vec![
3671
                "pre_visit Projection",
3672
                "pre_visit Filter",
3673
                "pre_visit TableScan",
3674
                "post_visit TableScan",
3675
                "post_visit Filter",
3676
                "post_visit Projection",
3677
            ]
3678
        );
3679
    }
3680
3681
    #[derive(Debug, Default)]
3682
    /// Counter than counts to zero and returns true when it gets there
3683
    struct OptionalCounter {
3684
        val: Option<usize>,
3685
    }
3686
3687
    impl OptionalCounter {
3688
        fn new(val: usize) -> Self {
3689
            Self { val: Some(val) }
3690
        }
3691
        // Decrements the counter by 1, if any, returning true if it hits zero
3692
        fn dec(&mut self) -> bool {
3693
            if Some(0) == self.val {
3694
                true
3695
            } else {
3696
                self.val = self.val.take().map(|i| i - 1);
3697
                false
3698
            }
3699
        }
3700
    }
3701
3702
    #[derive(Debug, Default)]
3703
    /// Visitor that returns false after some number of visits
3704
    struct StoppingVisitor {
3705
        inner: OkVisitor,
3706
        /// When Some(0) returns false from pre_visit
3707
        return_false_from_pre_in: OptionalCounter,
3708
        /// When Some(0) returns false from post_visit
3709
        return_false_from_post_in: OptionalCounter,
3710
    }
3711
3712
    impl<'n> TreeNodeVisitor<'n> for StoppingVisitor {
3713
        type Node = LogicalPlan;
3714
3715
        fn f_down(&mut self, plan: &'n LogicalPlan) -> Result<TreeNodeRecursion> {
3716
            if self.return_false_from_pre_in.dec() {
3717
                return Ok(TreeNodeRecursion::Stop);
3718
            }
3719
            self.inner.f_down(plan)?;
3720
3721
            Ok(TreeNodeRecursion::Continue)
3722
        }
3723
3724
        fn f_up(&mut self, plan: &'n LogicalPlan) -> Result<TreeNodeRecursion> {
3725
            if self.return_false_from_post_in.dec() {
3726
                return Ok(TreeNodeRecursion::Stop);
3727
            }
3728
3729
            self.inner.f_up(plan)
3730
        }
3731
    }
3732
3733
    /// test early stopping in pre-visit
3734
    #[test]
3735
    fn early_stopping_pre_visit() {
3736
        let mut visitor = StoppingVisitor {
3737
            return_false_from_pre_in: OptionalCounter::new(2),
3738
            ..Default::default()
3739
        };
3740
        let plan = test_plan();
3741
        let res = plan.visit_with_subqueries(&mut visitor);
3742
        assert!(res.is_ok());
3743
3744
        assert_eq!(
3745
            visitor.inner.strings,
3746
            vec!["pre_visit Projection", "pre_visit Filter"]
3747
        );
3748
    }
3749
3750
    #[test]
3751
    fn early_stopping_post_visit() {
3752
        let mut visitor = StoppingVisitor {
3753
            return_false_from_post_in: OptionalCounter::new(1),
3754
            ..Default::default()
3755
        };
3756
        let plan = test_plan();
3757
        let res = plan.visit_with_subqueries(&mut visitor);
3758
        assert!(res.is_ok());
3759
3760
        assert_eq!(
3761
            visitor.inner.strings,
3762
            vec![
3763
                "pre_visit Projection",
3764
                "pre_visit Filter",
3765
                "pre_visit TableScan",
3766
                "post_visit TableScan",
3767
            ]
3768
        );
3769
    }
3770
3771
    #[derive(Debug, Default)]
3772
    /// Visitor that returns an error after some number of visits
3773
    struct ErrorVisitor {
3774
        inner: OkVisitor,
3775
        /// When Some(0) returns false from pre_visit
3776
        return_error_from_pre_in: OptionalCounter,
3777
        /// When Some(0) returns false from post_visit
3778
        return_error_from_post_in: OptionalCounter,
3779
    }
3780
3781
    impl<'n> TreeNodeVisitor<'n> for ErrorVisitor {
3782
        type Node = LogicalPlan;
3783
3784
        fn f_down(&mut self, plan: &'n LogicalPlan) -> Result<TreeNodeRecursion> {
3785
            if self.return_error_from_pre_in.dec() {
3786
                return not_impl_err!("Error in pre_visit");
3787
            }
3788
3789
            self.inner.f_down(plan)
3790
        }
3791
3792
        fn f_up(&mut self, plan: &'n LogicalPlan) -> Result<TreeNodeRecursion> {
3793
            if self.return_error_from_post_in.dec() {
3794
                return not_impl_err!("Error in post_visit");
3795
            }
3796
3797
            self.inner.f_up(plan)
3798
        }
3799
    }
3800
3801
    #[test]
3802
    fn error_pre_visit() {
3803
        let mut visitor = ErrorVisitor {
3804
            return_error_from_pre_in: OptionalCounter::new(2),
3805
            ..Default::default()
3806
        };
3807
        let plan = test_plan();
3808
        let res = plan.visit_with_subqueries(&mut visitor).unwrap_err();
3809
        assert_eq!(
3810
            "This feature is not implemented: Error in pre_visit",
3811
            res.strip_backtrace()
3812
        );
3813
        assert_eq!(
3814
            visitor.inner.strings,
3815
            vec!["pre_visit Projection", "pre_visit Filter"]
3816
        );
3817
    }
3818
3819
    #[test]
3820
    fn error_post_visit() {
3821
        let mut visitor = ErrorVisitor {
3822
            return_error_from_post_in: OptionalCounter::new(1),
3823
            ..Default::default()
3824
        };
3825
        let plan = test_plan();
3826
        let res = plan.visit_with_subqueries(&mut visitor).unwrap_err();
3827
        assert_eq!(
3828
            "This feature is not implemented: Error in post_visit",
3829
            res.strip_backtrace()
3830
        );
3831
        assert_eq!(
3832
            visitor.inner.strings,
3833
            vec![
3834
                "pre_visit Projection",
3835
                "pre_visit Filter",
3836
                "pre_visit TableScan",
3837
                "post_visit TableScan",
3838
            ]
3839
        );
3840
    }
3841
3842
    #[test]
3843
    fn projection_expr_schema_mismatch() -> Result<()> {
3844
        let empty_schema = Arc::new(DFSchema::empty());
3845
        let p = Projection::try_new_with_schema(
3846
            vec![col("a")],
3847
            Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
3848
                produce_one_row: false,
3849
                schema: Arc::clone(&empty_schema),
3850
            })),
3851
            empty_schema,
3852
        );
3853
        assert_eq!(p.err().unwrap().strip_backtrace(), "Error during planning: Projection has mismatch between number of expressions (1) and number of fields in schema (0)");
3854
        Ok(())
3855
    }
3856
3857
    fn test_plan() -> LogicalPlan {
3858
        let schema = Schema::new(vec![
3859
            Field::new("id", DataType::Int32, false),
3860
            Field::new("state", DataType::Utf8, false),
3861
        ]);
3862
3863
        table_scan(TableReference::none(), &schema, Some(vec![0, 1]))
3864
            .unwrap()
3865
            .filter(col("state").eq(lit("CO")))
3866
            .unwrap()
3867
            .project(vec![col("id")])
3868
            .unwrap()
3869
            .build()
3870
            .unwrap()
3871
    }
3872
3873
    #[test]
3874
    fn test_replace_invalid_placeholder() {
3875
        // test empty placeholder
3876
        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
3877
3878
        let plan = table_scan(TableReference::none(), &schema, None)
3879
            .unwrap()
3880
            .filter(col("id").eq(placeholder("")))
3881
            .unwrap()
3882
            .build()
3883
            .unwrap();
3884
3885
        let param_values = vec![ScalarValue::Int32(Some(42))];
3886
        plan.replace_params_with_values(&param_values.clone().into())
3887
            .expect_err("unexpectedly succeeded to replace an invalid placeholder");
3888
3889
        // test $0 placeholder
3890
        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
3891
3892
        let plan = table_scan(TableReference::none(), &schema, None)
3893
            .unwrap()
3894
            .filter(col("id").eq(placeholder("$0")))
3895
            .unwrap()
3896
            .build()
3897
            .unwrap();
3898
3899
        plan.replace_params_with_values(&param_values.clone().into())
3900
            .expect_err("unexpectedly succeeded to replace an invalid placeholder");
3901
3902
        // test $00 placeholder
3903
        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
3904
3905
        let plan = table_scan(TableReference::none(), &schema, None)
3906
            .unwrap()
3907
            .filter(col("id").eq(placeholder("$00")))
3908
            .unwrap()
3909
            .build()
3910
            .unwrap();
3911
3912
        plan.replace_params_with_values(&param_values.into())
3913
            .expect_err("unexpectedly succeeded to replace an invalid placeholder");
3914
    }
3915
3916
    #[test]
3917
    fn test_nullable_schema_after_grouping_set() {
3918
        let schema = Schema::new(vec![
3919
            Field::new("foo", DataType::Int32, false),
3920
            Field::new("bar", DataType::Int32, false),
3921
        ]);
3922
3923
        let plan = table_scan(TableReference::none(), &schema, None)
3924
            .unwrap()
3925
            .aggregate(
3926
                vec![Expr::GroupingSet(GroupingSet::GroupingSets(vec![
3927
                    vec![col("foo")],
3928
                    vec![col("bar")],
3929
                ]))],
3930
                vec![count(lit(true))],
3931
            )
3932
            .unwrap()
3933
            .build()
3934
            .unwrap();
3935
3936
        let output_schema = plan.schema();
3937
3938
        assert!(output_schema
3939
            .field_with_name(None, "foo")
3940
            .unwrap()
3941
            .is_nullable(),);
3942
        assert!(output_schema
3943
            .field_with_name(None, "bar")
3944
            .unwrap()
3945
            .is_nullable());
3946
    }
3947
3948
    #[test]
3949
    fn test_filter_is_scalar() {
3950
        // test empty placeholder
3951
        let schema =
3952
            Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
3953
3954
        let source = Arc::new(LogicalTableSource::new(schema));
3955
        let schema = Arc::new(
3956
            DFSchema::try_from_qualified_schema(
3957
                TableReference::bare("tab"),
3958
                &source.schema(),
3959
            )
3960
            .unwrap(),
3961
        );
3962
        let scan = Arc::new(LogicalPlan::TableScan(TableScan {
3963
            table_name: TableReference::bare("tab"),
3964
            source: Arc::clone(&source) as Arc<dyn TableSource>,
3965
            projection: None,
3966
            projected_schema: Arc::clone(&schema),
3967
            filters: vec![],
3968
            fetch: None,
3969
        }));
3970
        let col = schema.field_names()[0].clone();
3971
3972
        let filter = Filter::try_new(
3973
            Expr::Column(col.into()).eq(Expr::Literal(ScalarValue::Int32(Some(1)))),
3974
            scan,
3975
        )
3976
        .unwrap();
3977
        assert!(!filter.is_scalar());
3978
        let unique_schema = Arc::new(
3979
            schema
3980
                .as_ref()
3981
                .clone()
3982
                .with_functional_dependencies(
3983
                    FunctionalDependencies::new_from_constraints(
3984
                        Some(&Constraints::new_unverified(vec![Constraint::Unique(
3985
                            vec![0],
3986
                        )])),
3987
                        1,
3988
                    ),
3989
                )
3990
                .unwrap(),
3991
        );
3992
        let scan = Arc::new(LogicalPlan::TableScan(TableScan {
3993
            table_name: TableReference::bare("tab"),
3994
            source,
3995
            projection: None,
3996
            projected_schema: Arc::clone(&unique_schema),
3997
            filters: vec![],
3998
            fetch: None,
3999
        }));
4000
        let col = schema.field_names()[0].clone();
4001
4002
        let filter =
4003
            Filter::try_new(Expr::Column(col.into()).eq(lit(1i32)), scan).unwrap();
4004
        assert!(filter.is_scalar());
4005
    }
4006
4007
    #[test]
4008
    fn test_transform_explain() {
4009
        let schema = Schema::new(vec![
4010
            Field::new("foo", DataType::Int32, false),
4011
            Field::new("bar", DataType::Int32, false),
4012
        ]);
4013
4014
        let plan = table_scan(TableReference::none(), &schema, None)
4015
            .unwrap()
4016
            .explain(false, false)
4017
            .unwrap()
4018
            .build()
4019
            .unwrap();
4020
4021
        let external_filter = col("foo").eq(lit(true));
4022
4023
        // after transformation, because plan is not the same anymore,
4024
        // the parent plan is built again with call to LogicalPlan::with_new_inputs -> with_new_exprs
4025
        let plan = plan
4026
            .transform(|plan| match plan {
4027
                LogicalPlan::TableScan(table) => {
4028
                    let filter = Filter::try_new(
4029
                        external_filter.clone(),
4030
                        Arc::new(LogicalPlan::TableScan(table)),
4031
                    )
4032
                    .unwrap();
4033
                    Ok(Transformed::yes(LogicalPlan::Filter(filter)))
4034
                }
4035
                x => Ok(Transformed::no(x)),
4036
            })
4037
            .data()
4038
            .unwrap();
4039
4040
        let expected = "Explain\
4041
                        \n  Filter: foo = Boolean(true)\
4042
                        \n    TableScan: ?table?";
4043
        let actual = format!("{}", plan.display_indent());
4044
        assert_eq!(expected.to_string(), actual)
4045
    }
4046
4047
    #[test]
4048
    fn test_plan_partial_ord() {
4049
        let empty_relation = LogicalPlan::EmptyRelation(EmptyRelation {
4050
            produce_one_row: false,
4051
            schema: Arc::new(DFSchema::empty()),
4052
        });
4053
4054
        let describe_table = LogicalPlan::DescribeTable(DescribeTable {
4055
            schema: Arc::new(Schema::new(vec![Field::new(
4056
                "foo",
4057
                DataType::Int32,
4058
                false,
4059
            )])),
4060
            output_schema: DFSchemaRef::new(DFSchema::empty()),
4061
        });
4062
4063
        let describe_table_clone = LogicalPlan::DescribeTable(DescribeTable {
4064
            schema: Arc::new(Schema::new(vec![Field::new(
4065
                "foo",
4066
                DataType::Int32,
4067
                false,
4068
            )])),
4069
            output_schema: DFSchemaRef::new(DFSchema::empty()),
4070
        });
4071
4072
        assert_eq!(
4073
            empty_relation.partial_cmp(&describe_table),
4074
            Some(Ordering::Less)
4075
        );
4076
        assert_eq!(
4077
            describe_table.partial_cmp(&empty_relation),
4078
            Some(Ordering::Greater)
4079
        );
4080
        assert_eq!(describe_table.partial_cmp(&describe_table_clone), None);
4081
    }
4082
}