Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/expr/src/logical_plan/tree_node.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//!  [`TreeNode`] based visiting and rewriting for [`LogicalPlan`]s
19
//!
20
//! Visiting (read only) APIs
21
//! * [`LogicalPlan::visit`]: recursively visit the node and all of its inputs
22
//! * [`LogicalPlan::visit_with_subqueries`]: recursively visit the node and all of its inputs, including subqueries
23
//! * [`LogicalPlan::apply_children`]: recursively visit all inputs of this node
24
//! * [`LogicalPlan::apply_expressions`]: (non recursively) visit all expressions of this node
25
//! * [`LogicalPlan::apply_subqueries`]: (non recursively) visit all subqueries of this node
26
//! * [`LogicalPlan::apply_with_subqueries`]: recursively visit all inputs and embedded subqueries.
27
//!
28
//! Rewriting (update) APIs:
29
//! * [`LogicalPlan::exists`]: search for an expression in a plan
30
//! * [`LogicalPlan::rewrite`]: recursively rewrite the node and all of its inputs
31
//! * [`LogicalPlan::map_children`]: recursively rewrite all inputs of this node
32
//! * [`LogicalPlan::map_expressions`]: (non recursively) visit all expressions of this node
33
//! * [`LogicalPlan::map_subqueries`]: (non recursively) rewrite all subqueries of this node
34
//! * [`LogicalPlan::rewrite_with_subqueries`]: recursively rewrite the node and all of its inputs, including subqueries
35
//!
36
//! (Re)creation APIs (these require substantial cloning and thus are slow):
37
//! * [`LogicalPlan::with_new_exprs`]: Create a new plan with different expressions
38
//! * [`LogicalPlan::expressions`]: Return a copy of the plan's expressions
39
use crate::{
40
    dml::CopyTo, Aggregate, Analyze, CreateMemoryTable, CreateView, CrossJoin,
41
    DdlStatement, Distinct, DistinctOn, DmlStatement, Explain, Expr, Extension, Filter,
42
    Join, Limit, LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery,
43
    Repartition, Sort, Subquery, SubqueryAlias, TableScan, Union, Unnest,
44
    UserDefinedLogicalNode, Values, Window,
45
};
46
use std::sync::Arc;
47
48
use crate::expr::{Exists, InSubquery};
49
use crate::tree_node::{transform_sort_option_vec, transform_sort_vec};
50
use datafusion_common::tree_node::{
51
    Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion, TreeNodeRewriter,
52
    TreeNodeVisitor,
53
};
54
use datafusion_common::{
55
    internal_err, map_until_stop_and_collect, DataFusionError, Result,
56
};
57
58
impl TreeNode for LogicalPlan {
59
0
    fn apply_children<'n, F: FnMut(&'n Self) -> Result<TreeNodeRecursion>>(
60
0
        &'n self,
61
0
        f: F,
62
0
    ) -> Result<TreeNodeRecursion> {
63
0
        self.inputs().into_iter().apply_until_stop(f)
64
0
    }
65
66
    /// Applies `f` to each child (input) of this plan node, rewriting them *in place.*
67
    ///
68
    /// # Notes
69
    ///
70
    /// Inputs include ONLY direct children, not embedded `LogicalPlan`s for
71
    /// subqueries, for example such as are in [`Expr::Exists`].
72
    ///
73
    /// [`Expr::Exists`]: crate::Expr::Exists
74
0
    fn map_children<F: FnMut(Self) -> Result<Transformed<Self>>>(
75
0
        self,
76
0
        mut f: F,
77
0
    ) -> Result<Transformed<Self>> {
78
0
        Ok(match self {
79
            LogicalPlan::Projection(Projection {
80
0
                expr,
81
0
                input,
82
0
                schema,
83
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
84
0
                LogicalPlan::Projection(Projection {
85
0
                    expr,
86
0
                    input,
87
0
                    schema,
88
0
                })
89
0
            }),
90
            LogicalPlan::Filter(Filter {
91
0
                predicate,
92
0
                input,
93
0
                having,
94
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
95
0
                LogicalPlan::Filter(Filter {
96
0
                    predicate,
97
0
                    input,
98
0
                    having,
99
0
                })
100
0
            }),
101
            LogicalPlan::Repartition(Repartition {
102
0
                input,
103
0
                partitioning_scheme,
104
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
105
0
                LogicalPlan::Repartition(Repartition {
106
0
                    input,
107
0
                    partitioning_scheme,
108
0
                })
109
0
            }),
110
            LogicalPlan::Window(Window {
111
0
                input,
112
0
                window_expr,
113
0
                schema,
114
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
115
0
                LogicalPlan::Window(Window {
116
0
                    input,
117
0
                    window_expr,
118
0
                    schema,
119
0
                })
120
0
            }),
121
            LogicalPlan::Aggregate(Aggregate {
122
0
                input,
123
0
                group_expr,
124
0
                aggr_expr,
125
0
                schema,
126
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
127
0
                LogicalPlan::Aggregate(Aggregate {
128
0
                    input,
129
0
                    group_expr,
130
0
                    aggr_expr,
131
0
                    schema,
132
0
                })
133
0
            }),
134
0
            LogicalPlan::Sort(Sort { expr, input, fetch }) => rewrite_arc(input, f)?
135
0
                .update_data(|input| LogicalPlan::Sort(Sort { expr, input, fetch })),
136
            LogicalPlan::Join(Join {
137
0
                left,
138
0
                right,
139
0
                on,
140
0
                filter,
141
0
                join_type,
142
0
                join_constraint,
143
0
                schema,
144
0
                null_equals_null,
145
0
            }) => map_until_stop_and_collect!(
146
0
                rewrite_arc(left, &mut f),
147
0
                right,
148
0
                rewrite_arc(right, &mut f)
149
0
            )?
150
0
            .update_data(|(left, right)| {
151
0
                LogicalPlan::Join(Join {
152
0
                    left,
153
0
                    right,
154
0
                    on,
155
0
                    filter,
156
0
                    join_type,
157
0
                    join_constraint,
158
0
                    schema,
159
0
                    null_equals_null,
160
0
                })
161
0
            }),
162
            LogicalPlan::CrossJoin(CrossJoin {
163
0
                left,
164
0
                right,
165
0
                schema,
166
0
            }) => map_until_stop_and_collect!(
167
0
                rewrite_arc(left, &mut f),
168
0
                right,
169
0
                rewrite_arc(right, &mut f)
170
0
            )?
171
0
            .update_data(|(left, right)| {
172
0
                LogicalPlan::CrossJoin(CrossJoin {
173
0
                    left,
174
0
                    right,
175
0
                    schema,
176
0
                })
177
0
            }),
178
0
            LogicalPlan::Limit(Limit { skip, fetch, input }) => rewrite_arc(input, f)?
179
0
                .update_data(|input| LogicalPlan::Limit(Limit { skip, fetch, input })),
180
            LogicalPlan::Subquery(Subquery {
181
0
                subquery,
182
0
                outer_ref_columns,
183
0
            }) => rewrite_arc(subquery, f)?.update_data(|subquery| {
184
0
                LogicalPlan::Subquery(Subquery {
185
0
                    subquery,
186
0
                    outer_ref_columns,
187
0
                })
188
0
            }),
189
            LogicalPlan::SubqueryAlias(SubqueryAlias {
190
0
                input,
191
0
                alias,
192
0
                schema,
193
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
194
0
                LogicalPlan::SubqueryAlias(SubqueryAlias {
195
0
                    input,
196
0
                    alias,
197
0
                    schema,
198
0
                })
199
0
            }),
200
0
            LogicalPlan::Extension(extension) => rewrite_extension_inputs(extension, f)?
201
0
                .update_data(LogicalPlan::Extension),
202
0
            LogicalPlan::Union(Union { inputs, schema }) => rewrite_arcs(inputs, f)?
203
0
                .update_data(|inputs| LogicalPlan::Union(Union { inputs, schema })),
204
0
            LogicalPlan::Distinct(distinct) => match distinct {
205
0
                Distinct::All(input) => rewrite_arc(input, f)?.update_data(Distinct::All),
206
                Distinct::On(DistinctOn {
207
0
                    on_expr,
208
0
                    select_expr,
209
0
                    sort_expr,
210
0
                    input,
211
0
                    schema,
212
0
                }) => rewrite_arc(input, f)?.update_data(|input| {
213
0
                    Distinct::On(DistinctOn {
214
0
                        on_expr,
215
0
                        select_expr,
216
0
                        sort_expr,
217
0
                        input,
218
0
                        schema,
219
0
                    })
220
0
                }),
221
            }
222
0
            .update_data(LogicalPlan::Distinct),
223
            LogicalPlan::Explain(Explain {
224
0
                verbose,
225
0
                plan,
226
0
                stringified_plans,
227
0
                schema,
228
0
                logical_optimization_succeeded,
229
0
            }) => rewrite_arc(plan, f)?.update_data(|plan| {
230
0
                LogicalPlan::Explain(Explain {
231
0
                    verbose,
232
0
                    plan,
233
0
                    stringified_plans,
234
0
                    schema,
235
0
                    logical_optimization_succeeded,
236
0
                })
237
0
            }),
238
            LogicalPlan::Analyze(Analyze {
239
0
                verbose,
240
0
                input,
241
0
                schema,
242
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
243
0
                LogicalPlan::Analyze(Analyze {
244
0
                    verbose,
245
0
                    input,
246
0
                    schema,
247
0
                })
248
0
            }),
249
            LogicalPlan::Dml(DmlStatement {
250
0
                table_name,
251
0
                table_schema,
252
0
                op,
253
0
                input,
254
0
                output_schema,
255
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
256
0
                LogicalPlan::Dml(DmlStatement {
257
0
                    table_name,
258
0
                    table_schema,
259
0
                    op,
260
0
                    input,
261
0
                    output_schema,
262
0
                })
263
0
            }),
264
            LogicalPlan::Copy(CopyTo {
265
0
                input,
266
0
                output_url,
267
0
                partition_by,
268
0
                file_type,
269
0
                options,
270
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
271
0
                LogicalPlan::Copy(CopyTo {
272
0
                    input,
273
0
                    output_url,
274
0
                    partition_by,
275
0
                    file_type,
276
0
                    options,
277
0
                })
278
0
            }),
279
0
            LogicalPlan::Ddl(ddl) => {
280
0
                match ddl {
281
                    DdlStatement::CreateMemoryTable(CreateMemoryTable {
282
0
                        name,
283
0
                        constraints,
284
0
                        input,
285
0
                        if_not_exists,
286
0
                        or_replace,
287
0
                        column_defaults,
288
0
                    }) => rewrite_arc(input, f)?.update_data(|input| {
289
0
                        DdlStatement::CreateMemoryTable(CreateMemoryTable {
290
0
                            name,
291
0
                            constraints,
292
0
                            input,
293
0
                            if_not_exists,
294
0
                            or_replace,
295
0
                            column_defaults,
296
0
                        })
297
0
                    }),
298
                    DdlStatement::CreateView(CreateView {
299
0
                        name,
300
0
                        input,
301
0
                        or_replace,
302
0
                        definition,
303
0
                    }) => rewrite_arc(input, f)?.update_data(|input| {
304
0
                        DdlStatement::CreateView(CreateView {
305
0
                            name,
306
0
                            input,
307
0
                            or_replace,
308
0
                            definition,
309
0
                        })
310
0
                    }),
311
                    // no inputs in these statements
312
                    DdlStatement::CreateExternalTable(_)
313
                    | DdlStatement::CreateCatalogSchema(_)
314
                    | DdlStatement::CreateCatalog(_)
315
                    | DdlStatement::CreateIndex(_)
316
                    | DdlStatement::DropTable(_)
317
                    | DdlStatement::DropView(_)
318
                    | DdlStatement::DropCatalogSchema(_)
319
                    | DdlStatement::CreateFunction(_)
320
0
                    | DdlStatement::DropFunction(_) => Transformed::no(ddl),
321
                }
322
0
                .update_data(LogicalPlan::Ddl)
323
            }
324
            LogicalPlan::Unnest(Unnest {
325
0
                input,
326
0
                exec_columns: input_columns,
327
0
                list_type_columns,
328
0
                struct_type_columns,
329
0
                dependency_indices,
330
0
                schema,
331
0
                options,
332
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
333
0
                LogicalPlan::Unnest(Unnest {
334
0
                    input,
335
0
                    exec_columns: input_columns,
336
0
                    dependency_indices,
337
0
                    list_type_columns,
338
0
                    struct_type_columns,
339
0
                    schema,
340
0
                    options,
341
0
                })
342
0
            }),
343
            LogicalPlan::Prepare(Prepare {
344
0
                name,
345
0
                data_types,
346
0
                input,
347
0
            }) => rewrite_arc(input, f)?.update_data(|input| {
348
0
                LogicalPlan::Prepare(Prepare {
349
0
                    name,
350
0
                    data_types,
351
0
                    input,
352
0
                })
353
0
            }),
354
            LogicalPlan::RecursiveQuery(RecursiveQuery {
355
0
                name,
356
0
                static_term,
357
0
                recursive_term,
358
0
                is_distinct,
359
0
            }) => map_until_stop_and_collect!(
360
0
                rewrite_arc(static_term, &mut f),
361
0
                recursive_term,
362
0
                rewrite_arc(recursive_term, &mut f)
363
0
            )?
364
0
            .update_data(|(static_term, recursive_term)| {
365
0
                LogicalPlan::RecursiveQuery(RecursiveQuery {
366
0
                    name,
367
0
                    static_term,
368
0
                    recursive_term,
369
0
                    is_distinct,
370
0
                })
371
0
            }),
372
            // plans without inputs
373
            LogicalPlan::TableScan { .. }
374
            | LogicalPlan::Statement { .. }
375
            | LogicalPlan::EmptyRelation { .. }
376
            | LogicalPlan::Values { .. }
377
0
            | LogicalPlan::DescribeTable(_) => Transformed::no(self),
378
        })
379
0
    }
380
}
381
382
/// Applies `f` to rewrite a `Arc<LogicalPlan>` without copying, if possible
383
0
fn rewrite_arc<F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>>(
384
0
    plan: Arc<LogicalPlan>,
385
0
    mut f: F,
386
0
) -> Result<Transformed<Arc<LogicalPlan>>> {
387
0
    f(Arc::unwrap_or_clone(plan))?.map_data(|new_plan| Ok(Arc::new(new_plan)))
388
0
}
389
390
/// rewrite a `Vec` of `Arc<LogicalPlan>` without copying, if possible
391
0
fn rewrite_arcs<F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>>(
392
0
    input_plans: Vec<Arc<LogicalPlan>>,
393
0
    mut f: F,
394
0
) -> Result<Transformed<Vec<Arc<LogicalPlan>>>> {
395
0
    input_plans
396
0
        .into_iter()
397
0
        .map_until_stop_and_collect(|plan| rewrite_arc(plan, &mut f))
398
0
}
399
400
/// Rewrites all inputs for an Extension node "in place"
401
/// (it currently has to copy values because there are no APIs for in place modification)
402
///
403
/// Should be removed when we have an API for in place modifications of the
404
/// extension to avoid these copies
405
0
fn rewrite_extension_inputs<F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>>(
406
0
    extension: Extension,
407
0
    f: F,
408
0
) -> Result<Transformed<Extension>> {
409
0
    let Extension { node } = extension;
410
0
411
0
    node.inputs()
412
0
        .into_iter()
413
0
        .cloned()
414
0
        .map_until_stop_and_collect(f)?
415
0
        .map_data(|new_inputs| {
416
0
            let exprs = node.expressions();
417
0
            Ok(Extension {
418
0
                node: node.with_exprs_and_inputs(exprs, new_inputs)?,
419
            })
420
0
        })
421
0
}
422
423
/// This macro is used to determine continuation during combined transforming
424
/// traversals.
425
macro_rules! handle_transform_recursion {
426
    ($F_DOWN:expr, $F_CHILD:expr, $F_UP:expr) => {{
427
        $F_DOWN?
428
0
            .transform_children(|n| n.map_subqueries($F_CHILD))?
429
0
            .transform_sibling(|n| n.map_children($F_CHILD))?
430
            .transform_parent($F_UP)
431
    }};
432
}
433
434
impl LogicalPlan {
435
    /// Calls `f` on all expressions in the current `LogicalPlan` node.
436
    ///
437
    /// # Notes
438
    /// * Similar to [`TreeNode::apply`] but for this node's expressions.
439
    /// * Does not include expressions in input `LogicalPlan` nodes
440
    /// * Visits only the top level expressions (Does not recurse into each expression)
441
0
    pub fn apply_expressions<F: FnMut(&Expr) -> Result<TreeNodeRecursion>>(
442
0
        &self,
443
0
        mut f: F,
444
0
    ) -> Result<TreeNodeRecursion> {
445
0
        match self {
446
0
            LogicalPlan::Projection(Projection { expr, .. }) => {
447
0
                expr.iter().apply_until_stop(f)
448
            }
449
0
            LogicalPlan::Values(Values { values, .. }) => values
450
0
                .iter()
451
0
                .apply_until_stop(|value| value.iter().apply_until_stop(&mut f)),
452
0
            LogicalPlan::Filter(Filter { predicate, .. }) => f(predicate),
453
            LogicalPlan::Repartition(Repartition {
454
0
                partitioning_scheme,
455
0
                ..
456
0
            }) => match partitioning_scheme {
457
0
                Partitioning::Hash(expr, _) | Partitioning::DistributeBy(expr) => {
458
0
                    expr.iter().apply_until_stop(f)
459
                }
460
0
                Partitioning::RoundRobinBatch(_) => Ok(TreeNodeRecursion::Continue),
461
            },
462
0
            LogicalPlan::Window(Window { window_expr, .. }) => {
463
0
                window_expr.iter().apply_until_stop(f)
464
            }
465
            LogicalPlan::Aggregate(Aggregate {
466
0
                group_expr,
467
0
                aggr_expr,
468
0
                ..
469
0
            }) => group_expr
470
0
                .iter()
471
0
                .chain(aggr_expr.iter())
472
0
                .apply_until_stop(f),
473
            // There are two part of expression for join, equijoin(on) and non-equijoin(filter).
474
            // 1. the first part is `on.len()` equijoin expressions, and the struct of each expr is `left-on = right-on`.
475
            // 2. the second part is non-equijoin(filter).
476
0
            LogicalPlan::Join(Join { on, filter, .. }) => {
477
0
                on.iter()
478
0
                    // TODO: why we need to create an `Expr::eq`? Cloning `Expr` is costly...
479
0
                    // it not ideal to create an expr here to analyze them, but could cache it on the Join itself
480
0
                    .map(|(l, r)| Expr::eq(l.clone(), r.clone()))
481
0
                    .apply_until_stop(|e| f(&e))?
482
0
                    .visit_sibling(|| filter.iter().apply_until_stop(f))
483
            }
484
0
            LogicalPlan::Sort(Sort { expr, .. }) => {
485
0
                expr.iter().apply_until_stop(|sort| f(&sort.expr))
486
            }
487
0
            LogicalPlan::Extension(extension) => {
488
0
                // would be nice to avoid this copy -- maybe can
489
0
                // update extension to just observer Exprs
490
0
                extension.node.expressions().iter().apply_until_stop(f)
491
            }
492
0
            LogicalPlan::TableScan(TableScan { filters, .. }) => {
493
0
                filters.iter().apply_until_stop(f)
494
            }
495
0
            LogicalPlan::Unnest(unnest) => {
496
0
                let columns = unnest.exec_columns.clone();
497
0
498
0
                let exprs = columns
499
0
                    .iter()
500
0
                    .map(|(c, _)| Expr::Column(c.clone()))
501
0
                    .collect::<Vec<_>>();
502
0
                exprs.iter().apply_until_stop(f)
503
            }
504
            LogicalPlan::Distinct(Distinct::On(DistinctOn {
505
0
                on_expr,
506
0
                select_expr,
507
0
                sort_expr,
508
0
                ..
509
0
            })) => on_expr
510
0
                .iter()
511
0
                .chain(select_expr.iter())
512
0
                .chain(sort_expr.iter().flatten().map(|sort| &sort.expr))
513
0
                .apply_until_stop(f),
514
            // plans without expressions
515
            LogicalPlan::EmptyRelation(_)
516
            | LogicalPlan::RecursiveQuery(_)
517
            | LogicalPlan::Subquery(_)
518
            | LogicalPlan::SubqueryAlias(_)
519
            | LogicalPlan::Limit(_)
520
            | LogicalPlan::Statement(_)
521
            | LogicalPlan::CrossJoin(_)
522
            | LogicalPlan::Analyze(_)
523
            | LogicalPlan::Explain(_)
524
            | LogicalPlan::Union(_)
525
            | LogicalPlan::Distinct(Distinct::All(_))
526
            | LogicalPlan::Dml(_)
527
            | LogicalPlan::Ddl(_)
528
            | LogicalPlan::Copy(_)
529
            | LogicalPlan::DescribeTable(_)
530
0
            | LogicalPlan::Prepare(_) => Ok(TreeNodeRecursion::Continue),
531
        }
532
0
    }
533
534
    /// Rewrites all expressions in the current `LogicalPlan` node using `f`.
535
    ///
536
    /// Returns the current node.
537
    ///
538
    /// # Notes
539
    /// * Similar to [`TreeNode::map_children`] but for this node's expressions.
540
    /// * Visits only the top level expressions (Does not recurse into each expression)
541
0
    pub fn map_expressions<F: FnMut(Expr) -> Result<Transformed<Expr>>>(
542
0
        self,
543
0
        mut f: F,
544
0
    ) -> Result<Transformed<Self>> {
545
0
        Ok(match self {
546
            LogicalPlan::Projection(Projection {
547
0
                expr,
548
0
                input,
549
0
                schema,
550
0
            }) => expr
551
0
                .into_iter()
552
0
                .map_until_stop_and_collect(f)?
553
0
                .update_data(|expr| {
554
0
                    LogicalPlan::Projection(Projection {
555
0
                        expr,
556
0
                        input,
557
0
                        schema,
558
0
                    })
559
0
                }),
560
0
            LogicalPlan::Values(Values { schema, values }) => values
561
0
                .into_iter()
562
0
                .map_until_stop_and_collect(|value| {
563
0
                    value.into_iter().map_until_stop_and_collect(&mut f)
564
0
                })?
565
0
                .update_data(|values| LogicalPlan::Values(Values { schema, values })),
566
            LogicalPlan::Filter(Filter {
567
0
                predicate,
568
0
                input,
569
0
                having,
570
0
            }) => f(predicate)?.update_data(|predicate| {
571
0
                LogicalPlan::Filter(Filter {
572
0
                    predicate,
573
0
                    input,
574
0
                    having,
575
0
                })
576
0
            }),
577
            LogicalPlan::Repartition(Repartition {
578
0
                input,
579
0
                partitioning_scheme,
580
0
            }) => match partitioning_scheme {
581
0
                Partitioning::Hash(expr, usize) => expr
582
0
                    .into_iter()
583
0
                    .map_until_stop_and_collect(f)?
584
0
                    .update_data(|expr| Partitioning::Hash(expr, usize)),
585
0
                Partitioning::DistributeBy(expr) => expr
586
0
                    .into_iter()
587
0
                    .map_until_stop_and_collect(f)?
588
0
                    .update_data(Partitioning::DistributeBy),
589
0
                Partitioning::RoundRobinBatch(_) => Transformed::no(partitioning_scheme),
590
            }
591
0
            .update_data(|partitioning_scheme| {
592
0
                LogicalPlan::Repartition(Repartition {
593
0
                    input,
594
0
                    partitioning_scheme,
595
0
                })
596
0
            }),
597
            LogicalPlan::Window(Window {
598
0
                input,
599
0
                window_expr,
600
0
                schema,
601
0
            }) => window_expr
602
0
                .into_iter()
603
0
                .map_until_stop_and_collect(f)?
604
0
                .update_data(|window_expr| {
605
0
                    LogicalPlan::Window(Window {
606
0
                        input,
607
0
                        window_expr,
608
0
                        schema,
609
0
                    })
610
0
                }),
611
            LogicalPlan::Aggregate(Aggregate {
612
0
                input,
613
0
                group_expr,
614
0
                aggr_expr,
615
0
                schema,
616
0
            }) => map_until_stop_and_collect!(
617
0
                group_expr.into_iter().map_until_stop_and_collect(&mut f),
618
0
                aggr_expr,
619
0
                aggr_expr.into_iter().map_until_stop_and_collect(&mut f)
620
0
            )?
621
0
            .update_data(|(group_expr, aggr_expr)| {
622
0
                LogicalPlan::Aggregate(Aggregate {
623
0
                    input,
624
0
                    group_expr,
625
0
                    aggr_expr,
626
0
                    schema,
627
0
                })
628
0
            }),
629
630
            // There are two part of expression for join, equijoin(on) and non-equijoin(filter).
631
            // 1. the first part is `on.len()` equijoin expressions, and the struct of each expr is `left-on = right-on`.
632
            // 2. the second part is non-equijoin(filter).
633
            LogicalPlan::Join(Join {
634
0
                left,
635
0
                right,
636
0
                on,
637
0
                filter,
638
0
                join_type,
639
0
                join_constraint,
640
0
                schema,
641
0
                null_equals_null,
642
0
            }) => map_until_stop_and_collect!(
643
0
                on.into_iter().map_until_stop_and_collect(
644
0
                    |on| map_until_stop_and_collect!(f(on.0), on.1, f(on.1))
645
0
                ),
646
0
                filter,
647
0
                filter.map_or(Ok::<_, DataFusionError>(Transformed::no(None)), |e| {
648
0
                    Ok(f(e)?.update_data(Some))
649
0
                })
650
0
            )?
651
0
            .update_data(|(on, filter)| {
652
0
                LogicalPlan::Join(Join {
653
0
                    left,
654
0
                    right,
655
0
                    on,
656
0
                    filter,
657
0
                    join_type,
658
0
                    join_constraint,
659
0
                    schema,
660
0
                    null_equals_null,
661
0
                })
662
0
            }),
663
0
            LogicalPlan::Sort(Sort { expr, input, fetch }) => {
664
0
                transform_sort_vec(expr, &mut f)?
665
0
                    .update_data(|expr| LogicalPlan::Sort(Sort { expr, input, fetch }))
666
            }
667
0
            LogicalPlan::Extension(Extension { node }) => {
668
                // would be nice to avoid this copy -- maybe can
669
                // update extension to just observer Exprs
670
0
                let exprs = node
671
0
                    .expressions()
672
0
                    .into_iter()
673
0
                    .map_until_stop_and_collect(f)?;
674
0
                let plan = LogicalPlan::Extension(Extension {
675
0
                    node: UserDefinedLogicalNode::with_exprs_and_inputs(
676
0
                        node.as_ref(),
677
0
                        exprs.data,
678
0
                        node.inputs().into_iter().cloned().collect::<Vec<_>>(),
679
0
                    )?,
680
                });
681
0
                Transformed::new(plan, exprs.transformed, exprs.tnr)
682
            }
683
            LogicalPlan::TableScan(TableScan {
684
0
                table_name,
685
0
                source,
686
0
                projection,
687
0
                projected_schema,
688
0
                filters,
689
0
                fetch,
690
0
            }) => filters
691
0
                .into_iter()
692
0
                .map_until_stop_and_collect(f)?
693
0
                .update_data(|filters| {
694
0
                    LogicalPlan::TableScan(TableScan {
695
0
                        table_name,
696
0
                        source,
697
0
                        projection,
698
0
                        projected_schema,
699
0
                        filters,
700
0
                        fetch,
701
0
                    })
702
0
                }),
703
            LogicalPlan::Distinct(Distinct::On(DistinctOn {
704
0
                on_expr,
705
0
                select_expr,
706
0
                sort_expr,
707
0
                input,
708
0
                schema,
709
0
            })) => map_until_stop_and_collect!(
710
0
                on_expr.into_iter().map_until_stop_and_collect(&mut f),
711
0
                select_expr,
712
0
                select_expr.into_iter().map_until_stop_and_collect(&mut f),
713
0
                sort_expr,
714
0
                transform_sort_option_vec(sort_expr, &mut f)
715
0
            )?
716
0
            .update_data(|(on_expr, select_expr, sort_expr)| {
717
0
                LogicalPlan::Distinct(Distinct::On(DistinctOn {
718
0
                    on_expr,
719
0
                    select_expr,
720
0
                    sort_expr,
721
0
                    input,
722
0
                    schema,
723
0
                }))
724
0
            }),
725
            // plans without expressions
726
            LogicalPlan::EmptyRelation(_)
727
            | LogicalPlan::Unnest(_)
728
            | LogicalPlan::RecursiveQuery(_)
729
            | LogicalPlan::Subquery(_)
730
            | LogicalPlan::SubqueryAlias(_)
731
            | LogicalPlan::Limit(_)
732
            | LogicalPlan::Statement(_)
733
            | LogicalPlan::CrossJoin(_)
734
            | LogicalPlan::Analyze(_)
735
            | LogicalPlan::Explain(_)
736
            | LogicalPlan::Union(_)
737
            | LogicalPlan::Distinct(Distinct::All(_))
738
            | LogicalPlan::Dml(_)
739
            | LogicalPlan::Ddl(_)
740
            | LogicalPlan::Copy(_)
741
            | LogicalPlan::DescribeTable(_)
742
0
            | LogicalPlan::Prepare(_) => Transformed::no(self),
743
        })
744
0
    }
745
746
    /// Visits a plan similarly to [`Self::visit`], including subqueries that
747
    /// may appear in expressions such as `IN (SELECT ...)`.
748
0
    pub fn visit_with_subqueries<V: for<'n> TreeNodeVisitor<'n, Node = Self>>(
749
0
        &self,
750
0
        visitor: &mut V,
751
0
    ) -> Result<TreeNodeRecursion> {
752
0
        visitor
753
0
            .f_down(self)?
754
0
            .visit_children(|| {
755
0
                self.apply_subqueries(|c| c.visit_with_subqueries(visitor))
756
0
            })?
757
0
            .visit_sibling(|| self.apply_children(|c| c.visit_with_subqueries(visitor)))?
758
0
            .visit_parent(|| visitor.f_up(self))
759
0
    }
760
761
    /// Similarly to [`Self::rewrite`], rewrites this node and its inputs using `f`,
762
    /// including subqueries that may appear in expressions such as `IN (SELECT
763
    /// ...)`.
764
0
    pub fn rewrite_with_subqueries<R: TreeNodeRewriter<Node = Self>>(
765
0
        self,
766
0
        rewriter: &mut R,
767
0
    ) -> Result<Transformed<Self>> {
768
0
        handle_transform_recursion!(
769
0
            rewriter.f_down(self),
770
0
            |c| c.rewrite_with_subqueries(rewriter),
771
0
            |n| rewriter.f_up(n)
772
0
        )
773
0
    }
774
775
    /// Similarly to [`Self::apply`], calls `f` on this node and all its inputs,
776
    /// including subqueries that may appear in expressions such as `IN (SELECT
777
    /// ...)`.
778
0
    pub fn apply_with_subqueries<F: FnMut(&Self) -> Result<TreeNodeRecursion>>(
779
0
        &self,
780
0
        mut f: F,
781
0
    ) -> Result<TreeNodeRecursion> {
782
0
        fn apply_with_subqueries_impl<
783
0
            F: FnMut(&LogicalPlan) -> Result<TreeNodeRecursion>,
784
0
        >(
785
0
            node: &LogicalPlan,
786
0
            f: &mut F,
787
0
        ) -> Result<TreeNodeRecursion> {
788
0
            f(node)?
789
0
                .visit_children(|| {
790
0
                    node.apply_subqueries(|c| apply_with_subqueries_impl(c, f))
791
0
                })?
792
0
                .visit_sibling(|| {
793
0
                    node.apply_children(|c| apply_with_subqueries_impl(c, f))
794
0
                })
795
0
        }
796
797
0
        apply_with_subqueries_impl(self, &mut f)
798
0
    }
799
800
    /// Similarly to [`Self::transform`], rewrites this node and its inputs using `f`,
801
    /// including subqueries that may appear in expressions such as `IN (SELECT
802
    /// ...)`.
803
0
    pub fn transform_with_subqueries<F: FnMut(Self) -> Result<Transformed<Self>>>(
804
0
        self,
805
0
        f: F,
806
0
    ) -> Result<Transformed<Self>> {
807
0
        self.transform_up_with_subqueries(f)
808
0
    }
809
810
    /// Similarly to [`Self::transform_down`], rewrites this node and its inputs using `f`,
811
    /// including subqueries that may appear in expressions such as `IN (SELECT
812
    /// ...)`.
813
0
    pub fn transform_down_with_subqueries<F: FnMut(Self) -> Result<Transformed<Self>>>(
814
0
        self,
815
0
        mut f: F,
816
0
    ) -> Result<Transformed<Self>> {
817
0
        fn transform_down_with_subqueries_impl<
818
0
            F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
819
0
        >(
820
0
            node: LogicalPlan,
821
0
            f: &mut F,
822
0
        ) -> Result<Transformed<LogicalPlan>> {
823
0
            f(node)?
824
0
                .transform_children(|n| {
825
0
                    n.map_subqueries(|c| transform_down_with_subqueries_impl(c, f))
826
0
                })?
827
0
                .transform_sibling(|n| {
828
0
                    n.map_children(|c| transform_down_with_subqueries_impl(c, f))
829
0
                })
830
0
        }
831
832
0
        transform_down_with_subqueries_impl(self, &mut f)
833
0
    }
834
835
    /// Similarly to [`Self::transform_up`], rewrites this node and its inputs using `f`,
836
    /// including subqueries that may appear in expressions such as `IN (SELECT
837
    /// ...)`.
838
0
    pub fn transform_up_with_subqueries<F: FnMut(Self) -> Result<Transformed<Self>>>(
839
0
        self,
840
0
        mut f: F,
841
0
    ) -> Result<Transformed<Self>> {
842
0
        fn transform_up_with_subqueries_impl<
843
0
            F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
844
0
        >(
845
0
            node: LogicalPlan,
846
0
            f: &mut F,
847
0
        ) -> Result<Transformed<LogicalPlan>> {
848
0
            node.map_subqueries(|c| transform_up_with_subqueries_impl(c, f))?
849
0
                .transform_sibling(|n| {
850
0
                    n.map_children(|c| transform_up_with_subqueries_impl(c, f))
851
0
                })?
852
0
                .transform_parent(f)
853
0
        }
854
855
0
        transform_up_with_subqueries_impl(self, &mut f)
856
0
    }
857
858
    /// Similarly to [`Self::transform_down`], rewrites this node and its inputs using `f`,
859
    /// including subqueries that may appear in expressions such as `IN (SELECT
860
    /// ...)`.
861
0
    pub fn transform_down_up_with_subqueries<
862
0
        FD: FnMut(Self) -> Result<Transformed<Self>>,
863
0
        FU: FnMut(Self) -> Result<Transformed<Self>>,
864
0
    >(
865
0
        self,
866
0
        mut f_down: FD,
867
0
        mut f_up: FU,
868
0
    ) -> Result<Transformed<Self>> {
869
0
        fn transform_down_up_with_subqueries_impl<
870
0
            FD: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
871
0
            FU: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
872
0
        >(
873
0
            node: LogicalPlan,
874
0
            f_down: &mut FD,
875
0
            f_up: &mut FU,
876
0
        ) -> Result<Transformed<LogicalPlan>> {
877
0
            handle_transform_recursion!(
878
0
                f_down(node),
879
0
                |c| transform_down_up_with_subqueries_impl(c, f_down, f_up),
880
0
                f_up
881
            )
882
0
        }
883
884
0
        transform_down_up_with_subqueries_impl(self, &mut f_down, &mut f_up)
885
0
    }
886
887
    /// Similarly to [`Self::apply`], calls `f` on  this node and its inputs
888
    /// including subqueries that may appear in expressions such as `IN (SELECT
889
    /// ...)`.
890
0
    pub fn apply_subqueries<F: FnMut(&Self) -> Result<TreeNodeRecursion>>(
891
0
        &self,
892
0
        mut f: F,
893
0
    ) -> Result<TreeNodeRecursion> {
894
0
        self.apply_expressions(|expr| {
895
0
            expr.apply(|expr| match expr {
896
0
                Expr::Exists(Exists { subquery, .. })
897
0
                | Expr::InSubquery(InSubquery { subquery, .. })
898
0
                | Expr::ScalarSubquery(subquery) => {
899
                    // use a synthetic plan so the collector sees a
900
                    // LogicalPlan::Subquery (even though it is
901
                    // actually a Subquery alias)
902
0
                    f(&LogicalPlan::Subquery(subquery.clone()))
903
                }
904
0
                _ => Ok(TreeNodeRecursion::Continue),
905
0
            })
906
0
        })
907
0
    }
908
909
    /// Similarly to [`Self::map_children`], rewrites all subqueries that may
910
    /// appear in expressions such as `IN (SELECT ...)` using `f`.
911
    ///
912
    /// Returns the current node.
913
0
    pub fn map_subqueries<F: FnMut(Self) -> Result<Transformed<Self>>>(
914
0
        self,
915
0
        mut f: F,
916
0
    ) -> Result<Transformed<Self>> {
917
0
        self.map_expressions(|expr| {
918
0
            expr.transform_down(|expr| match expr {
919
0
                Expr::Exists(Exists { subquery, negated }) => {
920
0
                    f(LogicalPlan::Subquery(subquery))?.map_data(|s| match s {
921
0
                        LogicalPlan::Subquery(subquery) => {
922
0
                            Ok(Expr::Exists(Exists { subquery, negated }))
923
                        }
924
0
                        _ => internal_err!("Transformation should return Subquery"),
925
0
                    })
926
                }
927
                Expr::InSubquery(InSubquery {
928
0
                    expr,
929
0
                    subquery,
930
0
                    negated,
931
0
                }) => f(LogicalPlan::Subquery(subquery))?.map_data(|s| match s {
932
0
                    LogicalPlan::Subquery(subquery) => Ok(Expr::InSubquery(InSubquery {
933
0
                        expr,
934
0
                        subquery,
935
0
                        negated,
936
0
                    })),
937
0
                    _ => internal_err!("Transformation should return Subquery"),
938
0
                }),
939
0
                Expr::ScalarSubquery(subquery) => f(LogicalPlan::Subquery(subquery))?
940
0
                    .map_data(|s| match s {
941
0
                        LogicalPlan::Subquery(subquery) => {
942
0
                            Ok(Expr::ScalarSubquery(subquery))
943
                        }
944
0
                        _ => internal_err!("Transformation should return Subquery"),
945
0
                    }),
946
0
                _ => Ok(Transformed::no(expr)),
947
0
            })
948
0
        })
949
0
    }
950
}