/Users/andrewlamb/Software/datafusion/datafusion/expr/src/planner.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! [`ContextProvider`] and [`ExprPlanner`] APIs to customize SQL query planning |
19 | | |
20 | | use std::fmt::Debug; |
21 | | use std::sync::Arc; |
22 | | |
23 | | use arrow::datatypes::{DataType, Field, SchemaRef}; |
24 | | use datafusion_common::{ |
25 | | config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema, |
26 | | Result, TableReference, |
27 | | }; |
28 | | |
29 | | use crate::{AggregateUDF, Expr, GetFieldAccess, ScalarUDF, TableSource, WindowUDF}; |
30 | | |
31 | | /// Provides the `SQL` query planner meta-data about tables and |
32 | | /// functions referenced in SQL statements, without a direct dependency on other |
33 | | /// DataFusion structures |
34 | | pub trait ContextProvider { |
35 | | /// Getter for a datasource |
36 | | fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>>; |
37 | | |
38 | 0 | fn get_file_type(&self, _ext: &str) -> Result<Arc<dyn FileType>> { |
39 | 0 | not_impl_err!("Registered file types are not supported") |
40 | 0 | } |
41 | | |
42 | | /// Getter for a table function |
43 | 0 | fn get_table_function_source( |
44 | 0 | &self, |
45 | 0 | _name: &str, |
46 | 0 | _args: Vec<Expr>, |
47 | 0 | ) -> Result<Arc<dyn TableSource>> { |
48 | 0 | not_impl_err!("Table Functions are not supported") |
49 | 0 | } |
50 | | |
51 | | /// This provides a worktable (an intermediate table that is used to store the results of a CTE during execution) |
52 | | /// We don't directly implement this in the logical plan's ['SqlToRel`] |
53 | | /// because the sql code needs access to a table that contains execution-related types that can't be a direct dependency |
54 | | /// of the sql crate (namely, the `CteWorktable`). |
55 | | /// The [`ContextProvider`] provides a way to "hide" this dependency. |
56 | 0 | fn create_cte_work_table( |
57 | 0 | &self, |
58 | 0 | _name: &str, |
59 | 0 | _schema: SchemaRef, |
60 | 0 | ) -> Result<Arc<dyn TableSource>> { |
61 | 0 | not_impl_err!("Recursive CTE is not implemented") |
62 | 0 | } |
63 | | |
64 | | /// Getter for expr planners |
65 | 0 | fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] { |
66 | 0 | &[] |
67 | 0 | } |
68 | | |
69 | | /// Getter for a UDF description |
70 | | fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>; |
71 | | /// Getter for a UDAF description |
72 | | fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>>; |
73 | | /// Getter for a UDWF |
74 | | fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>>; |
75 | | /// Getter for system/user-defined variable type |
76 | | fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType>; |
77 | | |
78 | | /// Get configuration options |
79 | | fn options(&self) -> &ConfigOptions; |
80 | | |
81 | | /// Get all user defined scalar function names |
82 | | fn udf_names(&self) -> Vec<String>; |
83 | | |
84 | | /// Get all user defined aggregate function names |
85 | | fn udaf_names(&self) -> Vec<String>; |
86 | | |
87 | | /// Get all user defined window function names |
88 | | fn udwf_names(&self) -> Vec<String>; |
89 | | } |
90 | | |
91 | | /// This trait allows users to customize the behavior of the SQL planner |
92 | | pub trait ExprPlanner: Debug + Send + Sync { |
93 | | /// Plan the binary operation between two expressions, returns original |
94 | | /// BinaryExpr if not possible |
95 | 0 | fn plan_binary_op( |
96 | 0 | &self, |
97 | 0 | expr: RawBinaryExpr, |
98 | 0 | _schema: &DFSchema, |
99 | 0 | ) -> Result<PlannerResult<RawBinaryExpr>> { |
100 | 0 | Ok(PlannerResult::Original(expr)) |
101 | 0 | } |
102 | | |
103 | | /// Plan the field access expression |
104 | | /// |
105 | | /// returns original FieldAccessExpr if not possible |
106 | 0 | fn plan_field_access( |
107 | 0 | &self, |
108 | 0 | expr: RawFieldAccessExpr, |
109 | 0 | _schema: &DFSchema, |
110 | 0 | ) -> Result<PlannerResult<RawFieldAccessExpr>> { |
111 | 0 | Ok(PlannerResult::Original(expr)) |
112 | 0 | } |
113 | | |
114 | | /// Plan the array literal, returns OriginalArray if not possible |
115 | | /// |
116 | | /// Returns origin expression arguments if not possible |
117 | 0 | fn plan_array_literal( |
118 | 0 | &self, |
119 | 0 | exprs: Vec<Expr>, |
120 | 0 | _schema: &DFSchema, |
121 | 0 | ) -> Result<PlannerResult<Vec<Expr>>> { |
122 | 0 | Ok(PlannerResult::Original(exprs)) |
123 | 0 | } |
124 | | |
125 | | // Plan the POSITION expression, e.g., POSITION(<expr> in <expr>) |
126 | | // returns origin expression arguments if not possible |
127 | 0 | fn plan_position(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { |
128 | 0 | Ok(PlannerResult::Original(args)) |
129 | 0 | } |
130 | | |
131 | | /// Plan the dictionary literal `{ key: value, ...}` |
132 | | /// |
133 | | /// Returns origin expression arguments if not possible |
134 | 0 | fn plan_dictionary_literal( |
135 | 0 | &self, |
136 | 0 | expr: RawDictionaryExpr, |
137 | 0 | _schema: &DFSchema, |
138 | 0 | ) -> Result<PlannerResult<RawDictionaryExpr>> { |
139 | 0 | Ok(PlannerResult::Original(expr)) |
140 | 0 | } |
141 | | |
142 | | /// Plan an extract expression, e.g., `EXTRACT(month FROM foo)` |
143 | | /// |
144 | | /// Returns origin expression arguments if not possible |
145 | 0 | fn plan_extract(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { |
146 | 0 | Ok(PlannerResult::Original(args)) |
147 | 0 | } |
148 | | |
149 | | /// Plan an substring expression, e.g., `SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])` |
150 | | /// |
151 | | /// Returns origin expression arguments if not possible |
152 | 0 | fn plan_substring(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { |
153 | 0 | Ok(PlannerResult::Original(args)) |
154 | 0 | } |
155 | | |
156 | | /// Plans a struct `struct(expression1[, ..., expression_n])` |
157 | | /// literal based on the given input expressions. |
158 | | /// This function takes a vector of expressions and a boolean flag indicating whether |
159 | | /// the struct uses the optional name |
160 | | /// |
161 | | /// Returns a `PlannerResult` containing either the planned struct expressions or the original |
162 | | /// input expressions if planning is not possible. |
163 | 0 | fn plan_struct_literal( |
164 | 0 | &self, |
165 | 0 | args: Vec<Expr>, |
166 | 0 | _is_named_struct: bool, |
167 | 0 | ) -> Result<PlannerResult<Vec<Expr>>> { |
168 | 0 | Ok(PlannerResult::Original(args)) |
169 | 0 | } |
170 | | |
171 | | /// Plans an overlay expression eg `overlay(str PLACING substr FROM pos [FOR count])` |
172 | | /// |
173 | | /// Returns origin expression arguments if not possible |
174 | 0 | fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { |
175 | 0 | Ok(PlannerResult::Original(args)) |
176 | 0 | } |
177 | | |
178 | | /// Plan a make_map expression, e.g., `make_map(key1, value1, key2, value2, ...)` |
179 | | /// |
180 | | /// Returns origin expression arguments if not possible |
181 | 0 | fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> { |
182 | 0 | Ok(PlannerResult::Original(args)) |
183 | 0 | } |
184 | | |
185 | | /// Plans compound identifier eg `db.schema.table` for non-empty nested names |
186 | | /// |
187 | | /// Note: |
188 | | /// Currently compound identifier for outer query schema is not supported. |
189 | | /// |
190 | | /// Returns planned expression |
191 | 0 | fn plan_compound_identifier( |
192 | 0 | &self, |
193 | 0 | _field: &Field, |
194 | 0 | _qualifier: Option<&TableReference>, |
195 | 0 | _nested_names: &[String], |
196 | 0 | ) -> Result<PlannerResult<Vec<Expr>>> { |
197 | 0 | not_impl_err!( |
198 | 0 | "Default planner compound identifier hasn't been implemented for ExprPlanner" |
199 | 0 | ) |
200 | 0 | } |
201 | | |
202 | | /// Plans `ANY` expression, e.g., `expr = ANY(array_expr)` |
203 | | /// |
204 | | /// Returns origin binary expression if not possible |
205 | 0 | fn plan_any(&self, expr: RawBinaryExpr) -> Result<PlannerResult<RawBinaryExpr>> { |
206 | 0 | Ok(PlannerResult::Original(expr)) |
207 | 0 | } |
208 | | } |
209 | | |
210 | | /// An operator with two arguments to plan |
211 | | /// |
212 | | /// Note `left` and `right` are DataFusion [`Expr`]s but the `op` is the SQL AST |
213 | | /// operator. |
214 | | /// |
215 | | /// This structure is used by [`ExprPlanner`] to plan operators with |
216 | | /// custom expressions. |
217 | | #[derive(Debug, Clone)] |
218 | | pub struct RawBinaryExpr { |
219 | | pub op: sqlparser::ast::BinaryOperator, |
220 | | pub left: Expr, |
221 | | pub right: Expr, |
222 | | } |
223 | | |
224 | | /// An expression with GetFieldAccess to plan |
225 | | /// |
226 | | /// This structure is used by [`ExprPlanner`] to plan operators with |
227 | | /// custom expressions. |
228 | | #[derive(Debug, Clone)] |
229 | | pub struct RawFieldAccessExpr { |
230 | | pub field_access: GetFieldAccess, |
231 | | pub expr: Expr, |
232 | | } |
233 | | |
234 | | /// A Dictionary literal expression `{ key: value, ...}` |
235 | | /// |
236 | | /// This structure is used by [`ExprPlanner`] to plan operators with |
237 | | /// custom expressions. |
238 | | #[derive(Debug, Clone)] |
239 | | pub struct RawDictionaryExpr { |
240 | | pub keys: Vec<Expr>, |
241 | | pub values: Vec<Expr>, |
242 | | } |
243 | | |
244 | | /// Result of planning a raw expr with [`ExprPlanner`] |
245 | | #[derive(Debug, Clone)] |
246 | | pub enum PlannerResult<T> { |
247 | | /// The raw expression was successfully planned as a new [`Expr`] |
248 | | Planned(Expr), |
249 | | /// The raw expression could not be planned, and is returned unmodified |
250 | | Original(T), |
251 | | } |