/Users/andrewlamb/Software/datafusion/datafusion/expr/src/logical_plan/tree_node.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! [`TreeNode`] based visiting and rewriting for [`LogicalPlan`]s |
19 | | //! |
20 | | //! Visiting (read only) APIs |
21 | | //! * [`LogicalPlan::visit`]: recursively visit the node and all of its inputs |
22 | | //! * [`LogicalPlan::visit_with_subqueries`]: recursively visit the node and all of its inputs, including subqueries |
23 | | //! * [`LogicalPlan::apply_children`]: recursively visit all inputs of this node |
24 | | //! * [`LogicalPlan::apply_expressions`]: (non recursively) visit all expressions of this node |
25 | | //! * [`LogicalPlan::apply_subqueries`]: (non recursively) visit all subqueries of this node |
26 | | //! * [`LogicalPlan::apply_with_subqueries`]: recursively visit all inputs and embedded subqueries. |
27 | | //! |
28 | | //! Rewriting (update) APIs: |
29 | | //! * [`LogicalPlan::exists`]: search for an expression in a plan |
30 | | //! * [`LogicalPlan::rewrite`]: recursively rewrite the node and all of its inputs |
31 | | //! * [`LogicalPlan::map_children`]: recursively rewrite all inputs of this node |
32 | | //! * [`LogicalPlan::map_expressions`]: (non recursively) visit all expressions of this node |
33 | | //! * [`LogicalPlan::map_subqueries`]: (non recursively) rewrite all subqueries of this node |
34 | | //! * [`LogicalPlan::rewrite_with_subqueries`]: recursively rewrite the node and all of its inputs, including subqueries |
35 | | //! |
36 | | //! (Re)creation APIs (these require substantial cloning and thus are slow): |
37 | | //! * [`LogicalPlan::with_new_exprs`]: Create a new plan with different expressions |
38 | | //! * [`LogicalPlan::expressions`]: Return a copy of the plan's expressions |
39 | | use crate::{ |
40 | | dml::CopyTo, Aggregate, Analyze, CreateMemoryTable, CreateView, CrossJoin, |
41 | | DdlStatement, Distinct, DistinctOn, DmlStatement, Explain, Expr, Extension, Filter, |
42 | | Join, Limit, LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery, |
43 | | Repartition, Sort, Subquery, SubqueryAlias, TableScan, Union, Unnest, |
44 | | UserDefinedLogicalNode, Values, Window, |
45 | | }; |
46 | | use std::sync::Arc; |
47 | | |
48 | | use crate::expr::{Exists, InSubquery}; |
49 | | use crate::tree_node::{transform_sort_option_vec, transform_sort_vec}; |
50 | | use datafusion_common::tree_node::{ |
51 | | Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion, TreeNodeRewriter, |
52 | | TreeNodeVisitor, |
53 | | }; |
54 | | use datafusion_common::{ |
55 | | internal_err, map_until_stop_and_collect, DataFusionError, Result, |
56 | | }; |
57 | | |
58 | | impl TreeNode for LogicalPlan { |
59 | 0 | fn apply_children<'n, F: FnMut(&'n Self) -> Result<TreeNodeRecursion>>( |
60 | 0 | &'n self, |
61 | 0 | f: F, |
62 | 0 | ) -> Result<TreeNodeRecursion> { |
63 | 0 | self.inputs().into_iter().apply_until_stop(f) |
64 | 0 | } |
65 | | |
66 | | /// Applies `f` to each child (input) of this plan node, rewriting them *in place.* |
67 | | /// |
68 | | /// # Notes |
69 | | /// |
70 | | /// Inputs include ONLY direct children, not embedded `LogicalPlan`s for |
71 | | /// subqueries, for example such as are in [`Expr::Exists`]. |
72 | | /// |
73 | | /// [`Expr::Exists`]: crate::Expr::Exists |
74 | 0 | fn map_children<F: FnMut(Self) -> Result<Transformed<Self>>>( |
75 | 0 | self, |
76 | 0 | mut f: F, |
77 | 0 | ) -> Result<Transformed<Self>> { |
78 | 0 | Ok(match self { |
79 | | LogicalPlan::Projection(Projection { |
80 | 0 | expr, |
81 | 0 | input, |
82 | 0 | schema, |
83 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
84 | 0 | LogicalPlan::Projection(Projection { |
85 | 0 | expr, |
86 | 0 | input, |
87 | 0 | schema, |
88 | 0 | }) |
89 | 0 | }), |
90 | | LogicalPlan::Filter(Filter { |
91 | 0 | predicate, |
92 | 0 | input, |
93 | 0 | having, |
94 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
95 | 0 | LogicalPlan::Filter(Filter { |
96 | 0 | predicate, |
97 | 0 | input, |
98 | 0 | having, |
99 | 0 | }) |
100 | 0 | }), |
101 | | LogicalPlan::Repartition(Repartition { |
102 | 0 | input, |
103 | 0 | partitioning_scheme, |
104 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
105 | 0 | LogicalPlan::Repartition(Repartition { |
106 | 0 | input, |
107 | 0 | partitioning_scheme, |
108 | 0 | }) |
109 | 0 | }), |
110 | | LogicalPlan::Window(Window { |
111 | 0 | input, |
112 | 0 | window_expr, |
113 | 0 | schema, |
114 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
115 | 0 | LogicalPlan::Window(Window { |
116 | 0 | input, |
117 | 0 | window_expr, |
118 | 0 | schema, |
119 | 0 | }) |
120 | 0 | }), |
121 | | LogicalPlan::Aggregate(Aggregate { |
122 | 0 | input, |
123 | 0 | group_expr, |
124 | 0 | aggr_expr, |
125 | 0 | schema, |
126 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
127 | 0 | LogicalPlan::Aggregate(Aggregate { |
128 | 0 | input, |
129 | 0 | group_expr, |
130 | 0 | aggr_expr, |
131 | 0 | schema, |
132 | 0 | }) |
133 | 0 | }), |
134 | 0 | LogicalPlan::Sort(Sort { expr, input, fetch }) => rewrite_arc(input, f)? |
135 | 0 | .update_data(|input| LogicalPlan::Sort(Sort { expr, input, fetch })), |
136 | | LogicalPlan::Join(Join { |
137 | 0 | left, |
138 | 0 | right, |
139 | 0 | on, |
140 | 0 | filter, |
141 | 0 | join_type, |
142 | 0 | join_constraint, |
143 | 0 | schema, |
144 | 0 | null_equals_null, |
145 | 0 | }) => map_until_stop_and_collect!( |
146 | 0 | rewrite_arc(left, &mut f), |
147 | 0 | right, |
148 | 0 | rewrite_arc(right, &mut f) |
149 | 0 | )? |
150 | 0 | .update_data(|(left, right)| { |
151 | 0 | LogicalPlan::Join(Join { |
152 | 0 | left, |
153 | 0 | right, |
154 | 0 | on, |
155 | 0 | filter, |
156 | 0 | join_type, |
157 | 0 | join_constraint, |
158 | 0 | schema, |
159 | 0 | null_equals_null, |
160 | 0 | }) |
161 | 0 | }), |
162 | | LogicalPlan::CrossJoin(CrossJoin { |
163 | 0 | left, |
164 | 0 | right, |
165 | 0 | schema, |
166 | 0 | }) => map_until_stop_and_collect!( |
167 | 0 | rewrite_arc(left, &mut f), |
168 | 0 | right, |
169 | 0 | rewrite_arc(right, &mut f) |
170 | 0 | )? |
171 | 0 | .update_data(|(left, right)| { |
172 | 0 | LogicalPlan::CrossJoin(CrossJoin { |
173 | 0 | left, |
174 | 0 | right, |
175 | 0 | schema, |
176 | 0 | }) |
177 | 0 | }), |
178 | 0 | LogicalPlan::Limit(Limit { skip, fetch, input }) => rewrite_arc(input, f)? |
179 | 0 | .update_data(|input| LogicalPlan::Limit(Limit { skip, fetch, input })), |
180 | | LogicalPlan::Subquery(Subquery { |
181 | 0 | subquery, |
182 | 0 | outer_ref_columns, |
183 | 0 | }) => rewrite_arc(subquery, f)?.update_data(|subquery| { |
184 | 0 | LogicalPlan::Subquery(Subquery { |
185 | 0 | subquery, |
186 | 0 | outer_ref_columns, |
187 | 0 | }) |
188 | 0 | }), |
189 | | LogicalPlan::SubqueryAlias(SubqueryAlias { |
190 | 0 | input, |
191 | 0 | alias, |
192 | 0 | schema, |
193 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
194 | 0 | LogicalPlan::SubqueryAlias(SubqueryAlias { |
195 | 0 | input, |
196 | 0 | alias, |
197 | 0 | schema, |
198 | 0 | }) |
199 | 0 | }), |
200 | 0 | LogicalPlan::Extension(extension) => rewrite_extension_inputs(extension, f)? |
201 | 0 | .update_data(LogicalPlan::Extension), |
202 | 0 | LogicalPlan::Union(Union { inputs, schema }) => rewrite_arcs(inputs, f)? |
203 | 0 | .update_data(|inputs| LogicalPlan::Union(Union { inputs, schema })), |
204 | 0 | LogicalPlan::Distinct(distinct) => match distinct { |
205 | 0 | Distinct::All(input) => rewrite_arc(input, f)?.update_data(Distinct::All), |
206 | | Distinct::On(DistinctOn { |
207 | 0 | on_expr, |
208 | 0 | select_expr, |
209 | 0 | sort_expr, |
210 | 0 | input, |
211 | 0 | schema, |
212 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
213 | 0 | Distinct::On(DistinctOn { |
214 | 0 | on_expr, |
215 | 0 | select_expr, |
216 | 0 | sort_expr, |
217 | 0 | input, |
218 | 0 | schema, |
219 | 0 | }) |
220 | 0 | }), |
221 | | } |
222 | 0 | .update_data(LogicalPlan::Distinct), |
223 | | LogicalPlan::Explain(Explain { |
224 | 0 | verbose, |
225 | 0 | plan, |
226 | 0 | stringified_plans, |
227 | 0 | schema, |
228 | 0 | logical_optimization_succeeded, |
229 | 0 | }) => rewrite_arc(plan, f)?.update_data(|plan| { |
230 | 0 | LogicalPlan::Explain(Explain { |
231 | 0 | verbose, |
232 | 0 | plan, |
233 | 0 | stringified_plans, |
234 | 0 | schema, |
235 | 0 | logical_optimization_succeeded, |
236 | 0 | }) |
237 | 0 | }), |
238 | | LogicalPlan::Analyze(Analyze { |
239 | 0 | verbose, |
240 | 0 | input, |
241 | 0 | schema, |
242 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
243 | 0 | LogicalPlan::Analyze(Analyze { |
244 | 0 | verbose, |
245 | 0 | input, |
246 | 0 | schema, |
247 | 0 | }) |
248 | 0 | }), |
249 | | LogicalPlan::Dml(DmlStatement { |
250 | 0 | table_name, |
251 | 0 | table_schema, |
252 | 0 | op, |
253 | 0 | input, |
254 | 0 | output_schema, |
255 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
256 | 0 | LogicalPlan::Dml(DmlStatement { |
257 | 0 | table_name, |
258 | 0 | table_schema, |
259 | 0 | op, |
260 | 0 | input, |
261 | 0 | output_schema, |
262 | 0 | }) |
263 | 0 | }), |
264 | | LogicalPlan::Copy(CopyTo { |
265 | 0 | input, |
266 | 0 | output_url, |
267 | 0 | partition_by, |
268 | 0 | file_type, |
269 | 0 | options, |
270 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
271 | 0 | LogicalPlan::Copy(CopyTo { |
272 | 0 | input, |
273 | 0 | output_url, |
274 | 0 | partition_by, |
275 | 0 | file_type, |
276 | 0 | options, |
277 | 0 | }) |
278 | 0 | }), |
279 | 0 | LogicalPlan::Ddl(ddl) => { |
280 | 0 | match ddl { |
281 | | DdlStatement::CreateMemoryTable(CreateMemoryTable { |
282 | 0 | name, |
283 | 0 | constraints, |
284 | 0 | input, |
285 | 0 | if_not_exists, |
286 | 0 | or_replace, |
287 | 0 | column_defaults, |
288 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
289 | 0 | DdlStatement::CreateMemoryTable(CreateMemoryTable { |
290 | 0 | name, |
291 | 0 | constraints, |
292 | 0 | input, |
293 | 0 | if_not_exists, |
294 | 0 | or_replace, |
295 | 0 | column_defaults, |
296 | 0 | }) |
297 | 0 | }), |
298 | | DdlStatement::CreateView(CreateView { |
299 | 0 | name, |
300 | 0 | input, |
301 | 0 | or_replace, |
302 | 0 | definition, |
303 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
304 | 0 | DdlStatement::CreateView(CreateView { |
305 | 0 | name, |
306 | 0 | input, |
307 | 0 | or_replace, |
308 | 0 | definition, |
309 | 0 | }) |
310 | 0 | }), |
311 | | // no inputs in these statements |
312 | | DdlStatement::CreateExternalTable(_) |
313 | | | DdlStatement::CreateCatalogSchema(_) |
314 | | | DdlStatement::CreateCatalog(_) |
315 | | | DdlStatement::CreateIndex(_) |
316 | | | DdlStatement::DropTable(_) |
317 | | | DdlStatement::DropView(_) |
318 | | | DdlStatement::DropCatalogSchema(_) |
319 | | | DdlStatement::CreateFunction(_) |
320 | 0 | | DdlStatement::DropFunction(_) => Transformed::no(ddl), |
321 | | } |
322 | 0 | .update_data(LogicalPlan::Ddl) |
323 | | } |
324 | | LogicalPlan::Unnest(Unnest { |
325 | 0 | input, |
326 | 0 | exec_columns: input_columns, |
327 | 0 | list_type_columns, |
328 | 0 | struct_type_columns, |
329 | 0 | dependency_indices, |
330 | 0 | schema, |
331 | 0 | options, |
332 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
333 | 0 | LogicalPlan::Unnest(Unnest { |
334 | 0 | input, |
335 | 0 | exec_columns: input_columns, |
336 | 0 | dependency_indices, |
337 | 0 | list_type_columns, |
338 | 0 | struct_type_columns, |
339 | 0 | schema, |
340 | 0 | options, |
341 | 0 | }) |
342 | 0 | }), |
343 | | LogicalPlan::Prepare(Prepare { |
344 | 0 | name, |
345 | 0 | data_types, |
346 | 0 | input, |
347 | 0 | }) => rewrite_arc(input, f)?.update_data(|input| { |
348 | 0 | LogicalPlan::Prepare(Prepare { |
349 | 0 | name, |
350 | 0 | data_types, |
351 | 0 | input, |
352 | 0 | }) |
353 | 0 | }), |
354 | | LogicalPlan::RecursiveQuery(RecursiveQuery { |
355 | 0 | name, |
356 | 0 | static_term, |
357 | 0 | recursive_term, |
358 | 0 | is_distinct, |
359 | 0 | }) => map_until_stop_and_collect!( |
360 | 0 | rewrite_arc(static_term, &mut f), |
361 | 0 | recursive_term, |
362 | 0 | rewrite_arc(recursive_term, &mut f) |
363 | 0 | )? |
364 | 0 | .update_data(|(static_term, recursive_term)| { |
365 | 0 | LogicalPlan::RecursiveQuery(RecursiveQuery { |
366 | 0 | name, |
367 | 0 | static_term, |
368 | 0 | recursive_term, |
369 | 0 | is_distinct, |
370 | 0 | }) |
371 | 0 | }), |
372 | | // plans without inputs |
373 | | LogicalPlan::TableScan { .. } |
374 | | | LogicalPlan::Statement { .. } |
375 | | | LogicalPlan::EmptyRelation { .. } |
376 | | | LogicalPlan::Values { .. } |
377 | 0 | | LogicalPlan::DescribeTable(_) => Transformed::no(self), |
378 | | }) |
379 | 0 | } |
380 | | } |
381 | | |
382 | | /// Applies `f` to rewrite a `Arc<LogicalPlan>` without copying, if possible |
383 | 0 | fn rewrite_arc<F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>>( |
384 | 0 | plan: Arc<LogicalPlan>, |
385 | 0 | mut f: F, |
386 | 0 | ) -> Result<Transformed<Arc<LogicalPlan>>> { |
387 | 0 | f(Arc::unwrap_or_clone(plan))?.map_data(|new_plan| Ok(Arc::new(new_plan))) |
388 | 0 | } |
389 | | |
390 | | /// rewrite a `Vec` of `Arc<LogicalPlan>` without copying, if possible |
391 | 0 | fn rewrite_arcs<F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>>( |
392 | 0 | input_plans: Vec<Arc<LogicalPlan>>, |
393 | 0 | mut f: F, |
394 | 0 | ) -> Result<Transformed<Vec<Arc<LogicalPlan>>>> { |
395 | 0 | input_plans |
396 | 0 | .into_iter() |
397 | 0 | .map_until_stop_and_collect(|plan| rewrite_arc(plan, &mut f)) |
398 | 0 | } |
399 | | |
400 | | /// Rewrites all inputs for an Extension node "in place" |
401 | | /// (it currently has to copy values because there are no APIs for in place modification) |
402 | | /// |
403 | | /// Should be removed when we have an API for in place modifications of the |
404 | | /// extension to avoid these copies |
405 | 0 | fn rewrite_extension_inputs<F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>>( |
406 | 0 | extension: Extension, |
407 | 0 | f: F, |
408 | 0 | ) -> Result<Transformed<Extension>> { |
409 | 0 | let Extension { node } = extension; |
410 | 0 |
|
411 | 0 | node.inputs() |
412 | 0 | .into_iter() |
413 | 0 | .cloned() |
414 | 0 | .map_until_stop_and_collect(f)? |
415 | 0 | .map_data(|new_inputs| { |
416 | 0 | let exprs = node.expressions(); |
417 | 0 | Ok(Extension { |
418 | 0 | node: node.with_exprs_and_inputs(exprs, new_inputs)?, |
419 | | }) |
420 | 0 | }) |
421 | 0 | } |
422 | | |
423 | | /// This macro is used to determine continuation during combined transforming |
424 | | /// traversals. |
425 | | macro_rules! handle_transform_recursion { |
426 | | ($F_DOWN:expr, $F_CHILD:expr, $F_UP:expr) => {{ |
427 | | $F_DOWN? |
428 | 0 | .transform_children(|n| n.map_subqueries($F_CHILD))? |
429 | 0 | .transform_sibling(|n| n.map_children($F_CHILD))? |
430 | | .transform_parent($F_UP) |
431 | | }}; |
432 | | } |
433 | | |
434 | | impl LogicalPlan { |
435 | | /// Calls `f` on all expressions in the current `LogicalPlan` node. |
436 | | /// |
437 | | /// # Notes |
438 | | /// * Similar to [`TreeNode::apply`] but for this node's expressions. |
439 | | /// * Does not include expressions in input `LogicalPlan` nodes |
440 | | /// * Visits only the top level expressions (Does not recurse into each expression) |
441 | 0 | pub fn apply_expressions<F: FnMut(&Expr) -> Result<TreeNodeRecursion>>( |
442 | 0 | &self, |
443 | 0 | mut f: F, |
444 | 0 | ) -> Result<TreeNodeRecursion> { |
445 | 0 | match self { |
446 | 0 | LogicalPlan::Projection(Projection { expr, .. }) => { |
447 | 0 | expr.iter().apply_until_stop(f) |
448 | | } |
449 | 0 | LogicalPlan::Values(Values { values, .. }) => values |
450 | 0 | .iter() |
451 | 0 | .apply_until_stop(|value| value.iter().apply_until_stop(&mut f)), |
452 | 0 | LogicalPlan::Filter(Filter { predicate, .. }) => f(predicate), |
453 | | LogicalPlan::Repartition(Repartition { |
454 | 0 | partitioning_scheme, |
455 | 0 | .. |
456 | 0 | }) => match partitioning_scheme { |
457 | 0 | Partitioning::Hash(expr, _) | Partitioning::DistributeBy(expr) => { |
458 | 0 | expr.iter().apply_until_stop(f) |
459 | | } |
460 | 0 | Partitioning::RoundRobinBatch(_) => Ok(TreeNodeRecursion::Continue), |
461 | | }, |
462 | 0 | LogicalPlan::Window(Window { window_expr, .. }) => { |
463 | 0 | window_expr.iter().apply_until_stop(f) |
464 | | } |
465 | | LogicalPlan::Aggregate(Aggregate { |
466 | 0 | group_expr, |
467 | 0 | aggr_expr, |
468 | 0 | .. |
469 | 0 | }) => group_expr |
470 | 0 | .iter() |
471 | 0 | .chain(aggr_expr.iter()) |
472 | 0 | .apply_until_stop(f), |
473 | | // There are two part of expression for join, equijoin(on) and non-equijoin(filter). |
474 | | // 1. the first part is `on.len()` equijoin expressions, and the struct of each expr is `left-on = right-on`. |
475 | | // 2. the second part is non-equijoin(filter). |
476 | 0 | LogicalPlan::Join(Join { on, filter, .. }) => { |
477 | 0 | on.iter() |
478 | 0 | // TODO: why we need to create an `Expr::eq`? Cloning `Expr` is costly... |
479 | 0 | // it not ideal to create an expr here to analyze them, but could cache it on the Join itself |
480 | 0 | .map(|(l, r)| Expr::eq(l.clone(), r.clone())) |
481 | 0 | .apply_until_stop(|e| f(&e))? |
482 | 0 | .visit_sibling(|| filter.iter().apply_until_stop(f)) |
483 | | } |
484 | 0 | LogicalPlan::Sort(Sort { expr, .. }) => { |
485 | 0 | expr.iter().apply_until_stop(|sort| f(&sort.expr)) |
486 | | } |
487 | 0 | LogicalPlan::Extension(extension) => { |
488 | 0 | // would be nice to avoid this copy -- maybe can |
489 | 0 | // update extension to just observer Exprs |
490 | 0 | extension.node.expressions().iter().apply_until_stop(f) |
491 | | } |
492 | 0 | LogicalPlan::TableScan(TableScan { filters, .. }) => { |
493 | 0 | filters.iter().apply_until_stop(f) |
494 | | } |
495 | 0 | LogicalPlan::Unnest(unnest) => { |
496 | 0 | let columns = unnest.exec_columns.clone(); |
497 | 0 |
|
498 | 0 | let exprs = columns |
499 | 0 | .iter() |
500 | 0 | .map(|(c, _)| Expr::Column(c.clone())) |
501 | 0 | .collect::<Vec<_>>(); |
502 | 0 | exprs.iter().apply_until_stop(f) |
503 | | } |
504 | | LogicalPlan::Distinct(Distinct::On(DistinctOn { |
505 | 0 | on_expr, |
506 | 0 | select_expr, |
507 | 0 | sort_expr, |
508 | 0 | .. |
509 | 0 | })) => on_expr |
510 | 0 | .iter() |
511 | 0 | .chain(select_expr.iter()) |
512 | 0 | .chain(sort_expr.iter().flatten().map(|sort| &sort.expr)) |
513 | 0 | .apply_until_stop(f), |
514 | | // plans without expressions |
515 | | LogicalPlan::EmptyRelation(_) |
516 | | | LogicalPlan::RecursiveQuery(_) |
517 | | | LogicalPlan::Subquery(_) |
518 | | | LogicalPlan::SubqueryAlias(_) |
519 | | | LogicalPlan::Limit(_) |
520 | | | LogicalPlan::Statement(_) |
521 | | | LogicalPlan::CrossJoin(_) |
522 | | | LogicalPlan::Analyze(_) |
523 | | | LogicalPlan::Explain(_) |
524 | | | LogicalPlan::Union(_) |
525 | | | LogicalPlan::Distinct(Distinct::All(_)) |
526 | | | LogicalPlan::Dml(_) |
527 | | | LogicalPlan::Ddl(_) |
528 | | | LogicalPlan::Copy(_) |
529 | | | LogicalPlan::DescribeTable(_) |
530 | 0 | | LogicalPlan::Prepare(_) => Ok(TreeNodeRecursion::Continue), |
531 | | } |
532 | 0 | } |
533 | | |
534 | | /// Rewrites all expressions in the current `LogicalPlan` node using `f`. |
535 | | /// |
536 | | /// Returns the current node. |
537 | | /// |
538 | | /// # Notes |
539 | | /// * Similar to [`TreeNode::map_children`] but for this node's expressions. |
540 | | /// * Visits only the top level expressions (Does not recurse into each expression) |
541 | 0 | pub fn map_expressions<F: FnMut(Expr) -> Result<Transformed<Expr>>>( |
542 | 0 | self, |
543 | 0 | mut f: F, |
544 | 0 | ) -> Result<Transformed<Self>> { |
545 | 0 | Ok(match self { |
546 | | LogicalPlan::Projection(Projection { |
547 | 0 | expr, |
548 | 0 | input, |
549 | 0 | schema, |
550 | 0 | }) => expr |
551 | 0 | .into_iter() |
552 | 0 | .map_until_stop_and_collect(f)? |
553 | 0 | .update_data(|expr| { |
554 | 0 | LogicalPlan::Projection(Projection { |
555 | 0 | expr, |
556 | 0 | input, |
557 | 0 | schema, |
558 | 0 | }) |
559 | 0 | }), |
560 | 0 | LogicalPlan::Values(Values { schema, values }) => values |
561 | 0 | .into_iter() |
562 | 0 | .map_until_stop_and_collect(|value| { |
563 | 0 | value.into_iter().map_until_stop_and_collect(&mut f) |
564 | 0 | })? |
565 | 0 | .update_data(|values| LogicalPlan::Values(Values { schema, values })), |
566 | | LogicalPlan::Filter(Filter { |
567 | 0 | predicate, |
568 | 0 | input, |
569 | 0 | having, |
570 | 0 | }) => f(predicate)?.update_data(|predicate| { |
571 | 0 | LogicalPlan::Filter(Filter { |
572 | 0 | predicate, |
573 | 0 | input, |
574 | 0 | having, |
575 | 0 | }) |
576 | 0 | }), |
577 | | LogicalPlan::Repartition(Repartition { |
578 | 0 | input, |
579 | 0 | partitioning_scheme, |
580 | 0 | }) => match partitioning_scheme { |
581 | 0 | Partitioning::Hash(expr, usize) => expr |
582 | 0 | .into_iter() |
583 | 0 | .map_until_stop_and_collect(f)? |
584 | 0 | .update_data(|expr| Partitioning::Hash(expr, usize)), |
585 | 0 | Partitioning::DistributeBy(expr) => expr |
586 | 0 | .into_iter() |
587 | 0 | .map_until_stop_and_collect(f)? |
588 | 0 | .update_data(Partitioning::DistributeBy), |
589 | 0 | Partitioning::RoundRobinBatch(_) => Transformed::no(partitioning_scheme), |
590 | | } |
591 | 0 | .update_data(|partitioning_scheme| { |
592 | 0 | LogicalPlan::Repartition(Repartition { |
593 | 0 | input, |
594 | 0 | partitioning_scheme, |
595 | 0 | }) |
596 | 0 | }), |
597 | | LogicalPlan::Window(Window { |
598 | 0 | input, |
599 | 0 | window_expr, |
600 | 0 | schema, |
601 | 0 | }) => window_expr |
602 | 0 | .into_iter() |
603 | 0 | .map_until_stop_and_collect(f)? |
604 | 0 | .update_data(|window_expr| { |
605 | 0 | LogicalPlan::Window(Window { |
606 | 0 | input, |
607 | 0 | window_expr, |
608 | 0 | schema, |
609 | 0 | }) |
610 | 0 | }), |
611 | | LogicalPlan::Aggregate(Aggregate { |
612 | 0 | input, |
613 | 0 | group_expr, |
614 | 0 | aggr_expr, |
615 | 0 | schema, |
616 | 0 | }) => map_until_stop_and_collect!( |
617 | 0 | group_expr.into_iter().map_until_stop_and_collect(&mut f), |
618 | 0 | aggr_expr, |
619 | 0 | aggr_expr.into_iter().map_until_stop_and_collect(&mut f) |
620 | 0 | )? |
621 | 0 | .update_data(|(group_expr, aggr_expr)| { |
622 | 0 | LogicalPlan::Aggregate(Aggregate { |
623 | 0 | input, |
624 | 0 | group_expr, |
625 | 0 | aggr_expr, |
626 | 0 | schema, |
627 | 0 | }) |
628 | 0 | }), |
629 | | |
630 | | // There are two part of expression for join, equijoin(on) and non-equijoin(filter). |
631 | | // 1. the first part is `on.len()` equijoin expressions, and the struct of each expr is `left-on = right-on`. |
632 | | // 2. the second part is non-equijoin(filter). |
633 | | LogicalPlan::Join(Join { |
634 | 0 | left, |
635 | 0 | right, |
636 | 0 | on, |
637 | 0 | filter, |
638 | 0 | join_type, |
639 | 0 | join_constraint, |
640 | 0 | schema, |
641 | 0 | null_equals_null, |
642 | 0 | }) => map_until_stop_and_collect!( |
643 | 0 | on.into_iter().map_until_stop_and_collect( |
644 | 0 | |on| map_until_stop_and_collect!(f(on.0), on.1, f(on.1)) |
645 | 0 | ), |
646 | 0 | filter, |
647 | 0 | filter.map_or(Ok::<_, DataFusionError>(Transformed::no(None)), |e| { |
648 | 0 | Ok(f(e)?.update_data(Some)) |
649 | 0 | }) |
650 | 0 | )? |
651 | 0 | .update_data(|(on, filter)| { |
652 | 0 | LogicalPlan::Join(Join { |
653 | 0 | left, |
654 | 0 | right, |
655 | 0 | on, |
656 | 0 | filter, |
657 | 0 | join_type, |
658 | 0 | join_constraint, |
659 | 0 | schema, |
660 | 0 | null_equals_null, |
661 | 0 | }) |
662 | 0 | }), |
663 | 0 | LogicalPlan::Sort(Sort { expr, input, fetch }) => { |
664 | 0 | transform_sort_vec(expr, &mut f)? |
665 | 0 | .update_data(|expr| LogicalPlan::Sort(Sort { expr, input, fetch })) |
666 | | } |
667 | 0 | LogicalPlan::Extension(Extension { node }) => { |
668 | | // would be nice to avoid this copy -- maybe can |
669 | | // update extension to just observer Exprs |
670 | 0 | let exprs = node |
671 | 0 | .expressions() |
672 | 0 | .into_iter() |
673 | 0 | .map_until_stop_and_collect(f)?; |
674 | 0 | let plan = LogicalPlan::Extension(Extension { |
675 | 0 | node: UserDefinedLogicalNode::with_exprs_and_inputs( |
676 | 0 | node.as_ref(), |
677 | 0 | exprs.data, |
678 | 0 | node.inputs().into_iter().cloned().collect::<Vec<_>>(), |
679 | 0 | )?, |
680 | | }); |
681 | 0 | Transformed::new(plan, exprs.transformed, exprs.tnr) |
682 | | } |
683 | | LogicalPlan::TableScan(TableScan { |
684 | 0 | table_name, |
685 | 0 | source, |
686 | 0 | projection, |
687 | 0 | projected_schema, |
688 | 0 | filters, |
689 | 0 | fetch, |
690 | 0 | }) => filters |
691 | 0 | .into_iter() |
692 | 0 | .map_until_stop_and_collect(f)? |
693 | 0 | .update_data(|filters| { |
694 | 0 | LogicalPlan::TableScan(TableScan { |
695 | 0 | table_name, |
696 | 0 | source, |
697 | 0 | projection, |
698 | 0 | projected_schema, |
699 | 0 | filters, |
700 | 0 | fetch, |
701 | 0 | }) |
702 | 0 | }), |
703 | | LogicalPlan::Distinct(Distinct::On(DistinctOn { |
704 | 0 | on_expr, |
705 | 0 | select_expr, |
706 | 0 | sort_expr, |
707 | 0 | input, |
708 | 0 | schema, |
709 | 0 | })) => map_until_stop_and_collect!( |
710 | 0 | on_expr.into_iter().map_until_stop_and_collect(&mut f), |
711 | 0 | select_expr, |
712 | 0 | select_expr.into_iter().map_until_stop_and_collect(&mut f), |
713 | 0 | sort_expr, |
714 | 0 | transform_sort_option_vec(sort_expr, &mut f) |
715 | 0 | )? |
716 | 0 | .update_data(|(on_expr, select_expr, sort_expr)| { |
717 | 0 | LogicalPlan::Distinct(Distinct::On(DistinctOn { |
718 | 0 | on_expr, |
719 | 0 | select_expr, |
720 | 0 | sort_expr, |
721 | 0 | input, |
722 | 0 | schema, |
723 | 0 | })) |
724 | 0 | }), |
725 | | // plans without expressions |
726 | | LogicalPlan::EmptyRelation(_) |
727 | | | LogicalPlan::Unnest(_) |
728 | | | LogicalPlan::RecursiveQuery(_) |
729 | | | LogicalPlan::Subquery(_) |
730 | | | LogicalPlan::SubqueryAlias(_) |
731 | | | LogicalPlan::Limit(_) |
732 | | | LogicalPlan::Statement(_) |
733 | | | LogicalPlan::CrossJoin(_) |
734 | | | LogicalPlan::Analyze(_) |
735 | | | LogicalPlan::Explain(_) |
736 | | | LogicalPlan::Union(_) |
737 | | | LogicalPlan::Distinct(Distinct::All(_)) |
738 | | | LogicalPlan::Dml(_) |
739 | | | LogicalPlan::Ddl(_) |
740 | | | LogicalPlan::Copy(_) |
741 | | | LogicalPlan::DescribeTable(_) |
742 | 0 | | LogicalPlan::Prepare(_) => Transformed::no(self), |
743 | | }) |
744 | 0 | } |
745 | | |
746 | | /// Visits a plan similarly to [`Self::visit`], including subqueries that |
747 | | /// may appear in expressions such as `IN (SELECT ...)`. |
748 | 0 | pub fn visit_with_subqueries<V: for<'n> TreeNodeVisitor<'n, Node = Self>>( |
749 | 0 | &self, |
750 | 0 | visitor: &mut V, |
751 | 0 | ) -> Result<TreeNodeRecursion> { |
752 | 0 | visitor |
753 | 0 | .f_down(self)? |
754 | 0 | .visit_children(|| { |
755 | 0 | self.apply_subqueries(|c| c.visit_with_subqueries(visitor)) |
756 | 0 | })? |
757 | 0 | .visit_sibling(|| self.apply_children(|c| c.visit_with_subqueries(visitor)))? |
758 | 0 | .visit_parent(|| visitor.f_up(self)) |
759 | 0 | } |
760 | | |
761 | | /// Similarly to [`Self::rewrite`], rewrites this node and its inputs using `f`, |
762 | | /// including subqueries that may appear in expressions such as `IN (SELECT |
763 | | /// ...)`. |
764 | 0 | pub fn rewrite_with_subqueries<R: TreeNodeRewriter<Node = Self>>( |
765 | 0 | self, |
766 | 0 | rewriter: &mut R, |
767 | 0 | ) -> Result<Transformed<Self>> { |
768 | 0 | handle_transform_recursion!( |
769 | 0 | rewriter.f_down(self), |
770 | 0 | |c| c.rewrite_with_subqueries(rewriter), |
771 | 0 | |n| rewriter.f_up(n) |
772 | 0 | ) |
773 | 0 | } |
774 | | |
775 | | /// Similarly to [`Self::apply`], calls `f` on this node and all its inputs, |
776 | | /// including subqueries that may appear in expressions such as `IN (SELECT |
777 | | /// ...)`. |
778 | 0 | pub fn apply_with_subqueries<F: FnMut(&Self) -> Result<TreeNodeRecursion>>( |
779 | 0 | &self, |
780 | 0 | mut f: F, |
781 | 0 | ) -> Result<TreeNodeRecursion> { |
782 | 0 | fn apply_with_subqueries_impl< |
783 | 0 | F: FnMut(&LogicalPlan) -> Result<TreeNodeRecursion>, |
784 | 0 | >( |
785 | 0 | node: &LogicalPlan, |
786 | 0 | f: &mut F, |
787 | 0 | ) -> Result<TreeNodeRecursion> { |
788 | 0 | f(node)? |
789 | 0 | .visit_children(|| { |
790 | 0 | node.apply_subqueries(|c| apply_with_subqueries_impl(c, f)) |
791 | 0 | })? |
792 | 0 | .visit_sibling(|| { |
793 | 0 | node.apply_children(|c| apply_with_subqueries_impl(c, f)) |
794 | 0 | }) |
795 | 0 | } |
796 | | |
797 | 0 | apply_with_subqueries_impl(self, &mut f) |
798 | 0 | } |
799 | | |
800 | | /// Similarly to [`Self::transform`], rewrites this node and its inputs using `f`, |
801 | | /// including subqueries that may appear in expressions such as `IN (SELECT |
802 | | /// ...)`. |
803 | 0 | pub fn transform_with_subqueries<F: FnMut(Self) -> Result<Transformed<Self>>>( |
804 | 0 | self, |
805 | 0 | f: F, |
806 | 0 | ) -> Result<Transformed<Self>> { |
807 | 0 | self.transform_up_with_subqueries(f) |
808 | 0 | } |
809 | | |
810 | | /// Similarly to [`Self::transform_down`], rewrites this node and its inputs using `f`, |
811 | | /// including subqueries that may appear in expressions such as `IN (SELECT |
812 | | /// ...)`. |
813 | 0 | pub fn transform_down_with_subqueries<F: FnMut(Self) -> Result<Transformed<Self>>>( |
814 | 0 | self, |
815 | 0 | mut f: F, |
816 | 0 | ) -> Result<Transformed<Self>> { |
817 | 0 | fn transform_down_with_subqueries_impl< |
818 | 0 | F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>, |
819 | 0 | >( |
820 | 0 | node: LogicalPlan, |
821 | 0 | f: &mut F, |
822 | 0 | ) -> Result<Transformed<LogicalPlan>> { |
823 | 0 | f(node)? |
824 | 0 | .transform_children(|n| { |
825 | 0 | n.map_subqueries(|c| transform_down_with_subqueries_impl(c, f)) |
826 | 0 | })? |
827 | 0 | .transform_sibling(|n| { |
828 | 0 | n.map_children(|c| transform_down_with_subqueries_impl(c, f)) |
829 | 0 | }) |
830 | 0 | } |
831 | | |
832 | 0 | transform_down_with_subqueries_impl(self, &mut f) |
833 | 0 | } |
834 | | |
835 | | /// Similarly to [`Self::transform_up`], rewrites this node and its inputs using `f`, |
836 | | /// including subqueries that may appear in expressions such as `IN (SELECT |
837 | | /// ...)`. |
838 | 0 | pub fn transform_up_with_subqueries<F: FnMut(Self) -> Result<Transformed<Self>>>( |
839 | 0 | self, |
840 | 0 | mut f: F, |
841 | 0 | ) -> Result<Transformed<Self>> { |
842 | 0 | fn transform_up_with_subqueries_impl< |
843 | 0 | F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>, |
844 | 0 | >( |
845 | 0 | node: LogicalPlan, |
846 | 0 | f: &mut F, |
847 | 0 | ) -> Result<Transformed<LogicalPlan>> { |
848 | 0 | node.map_subqueries(|c| transform_up_with_subqueries_impl(c, f))? |
849 | 0 | .transform_sibling(|n| { |
850 | 0 | n.map_children(|c| transform_up_with_subqueries_impl(c, f)) |
851 | 0 | })? |
852 | 0 | .transform_parent(f) |
853 | 0 | } |
854 | | |
855 | 0 | transform_up_with_subqueries_impl(self, &mut f) |
856 | 0 | } |
857 | | |
858 | | /// Similarly to [`Self::transform_down`], rewrites this node and its inputs using `f`, |
859 | | /// including subqueries that may appear in expressions such as `IN (SELECT |
860 | | /// ...)`. |
861 | 0 | pub fn transform_down_up_with_subqueries< |
862 | 0 | FD: FnMut(Self) -> Result<Transformed<Self>>, |
863 | 0 | FU: FnMut(Self) -> Result<Transformed<Self>>, |
864 | 0 | >( |
865 | 0 | self, |
866 | 0 | mut f_down: FD, |
867 | 0 | mut f_up: FU, |
868 | 0 | ) -> Result<Transformed<Self>> { |
869 | 0 | fn transform_down_up_with_subqueries_impl< |
870 | 0 | FD: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>, |
871 | 0 | FU: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>, |
872 | 0 | >( |
873 | 0 | node: LogicalPlan, |
874 | 0 | f_down: &mut FD, |
875 | 0 | f_up: &mut FU, |
876 | 0 | ) -> Result<Transformed<LogicalPlan>> { |
877 | 0 | handle_transform_recursion!( |
878 | 0 | f_down(node), |
879 | 0 | |c| transform_down_up_with_subqueries_impl(c, f_down, f_up), |
880 | 0 | f_up |
881 | | ) |
882 | 0 | } |
883 | | |
884 | 0 | transform_down_up_with_subqueries_impl(self, &mut f_down, &mut f_up) |
885 | 0 | } |
886 | | |
887 | | /// Similarly to [`Self::apply`], calls `f` on this node and its inputs |
888 | | /// including subqueries that may appear in expressions such as `IN (SELECT |
889 | | /// ...)`. |
890 | 0 | pub fn apply_subqueries<F: FnMut(&Self) -> Result<TreeNodeRecursion>>( |
891 | 0 | &self, |
892 | 0 | mut f: F, |
893 | 0 | ) -> Result<TreeNodeRecursion> { |
894 | 0 | self.apply_expressions(|expr| { |
895 | 0 | expr.apply(|expr| match expr { |
896 | 0 | Expr::Exists(Exists { subquery, .. }) |
897 | 0 | | Expr::InSubquery(InSubquery { subquery, .. }) |
898 | 0 | | Expr::ScalarSubquery(subquery) => { |
899 | | // use a synthetic plan so the collector sees a |
900 | | // LogicalPlan::Subquery (even though it is |
901 | | // actually a Subquery alias) |
902 | 0 | f(&LogicalPlan::Subquery(subquery.clone())) |
903 | | } |
904 | 0 | _ => Ok(TreeNodeRecursion::Continue), |
905 | 0 | }) |
906 | 0 | }) |
907 | 0 | } |
908 | | |
909 | | /// Similarly to [`Self::map_children`], rewrites all subqueries that may |
910 | | /// appear in expressions such as `IN (SELECT ...)` using `f`. |
911 | | /// |
912 | | /// Returns the current node. |
913 | 0 | pub fn map_subqueries<F: FnMut(Self) -> Result<Transformed<Self>>>( |
914 | 0 | self, |
915 | 0 | mut f: F, |
916 | 0 | ) -> Result<Transformed<Self>> { |
917 | 0 | self.map_expressions(|expr| { |
918 | 0 | expr.transform_down(|expr| match expr { |
919 | 0 | Expr::Exists(Exists { subquery, negated }) => { |
920 | 0 | f(LogicalPlan::Subquery(subquery))?.map_data(|s| match s { |
921 | 0 | LogicalPlan::Subquery(subquery) => { |
922 | 0 | Ok(Expr::Exists(Exists { subquery, negated })) |
923 | | } |
924 | 0 | _ => internal_err!("Transformation should return Subquery"), |
925 | 0 | }) |
926 | | } |
927 | | Expr::InSubquery(InSubquery { |
928 | 0 | expr, |
929 | 0 | subquery, |
930 | 0 | negated, |
931 | 0 | }) => f(LogicalPlan::Subquery(subquery))?.map_data(|s| match s { |
932 | 0 | LogicalPlan::Subquery(subquery) => Ok(Expr::InSubquery(InSubquery { |
933 | 0 | expr, |
934 | 0 | subquery, |
935 | 0 | negated, |
936 | 0 | })), |
937 | 0 | _ => internal_err!("Transformation should return Subquery"), |
938 | 0 | }), |
939 | 0 | Expr::ScalarSubquery(subquery) => f(LogicalPlan::Subquery(subquery))? |
940 | 0 | .map_data(|s| match s { |
941 | 0 | LogicalPlan::Subquery(subquery) => { |
942 | 0 | Ok(Expr::ScalarSubquery(subquery)) |
943 | | } |
944 | 0 | _ => internal_err!("Transformation should return Subquery"), |
945 | 0 | }), |
946 | 0 | _ => Ok(Transformed::no(expr)), |
947 | 0 | }) |
948 | 0 | }) |
949 | 0 | } |
950 | | } |