Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/expr/src/planner.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! [`ContextProvider`] and [`ExprPlanner`] APIs to customize SQL query planning
19
20
use std::fmt::Debug;
21
use std::sync::Arc;
22
23
use arrow::datatypes::{DataType, Field, SchemaRef};
24
use datafusion_common::{
25
    config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema,
26
    Result, TableReference,
27
};
28
29
use crate::{AggregateUDF, Expr, GetFieldAccess, ScalarUDF, TableSource, WindowUDF};
30
31
/// Provides the `SQL` query planner  meta-data about tables and
32
/// functions referenced in SQL statements, without a direct dependency on other
33
/// DataFusion structures
34
pub trait ContextProvider {
35
    /// Getter for a datasource
36
    fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>>;
37
38
0
    fn get_file_type(&self, _ext: &str) -> Result<Arc<dyn FileType>> {
39
0
        not_impl_err!("Registered file types are not supported")
40
0
    }
41
42
    /// Getter for a table function
43
0
    fn get_table_function_source(
44
0
        &self,
45
0
        _name: &str,
46
0
        _args: Vec<Expr>,
47
0
    ) -> Result<Arc<dyn TableSource>> {
48
0
        not_impl_err!("Table Functions are not supported")
49
0
    }
50
51
    /// This provides a worktable (an intermediate table that is used to store the results of a CTE during execution)
52
    /// We don't directly implement this in the logical plan's ['SqlToRel`]
53
    /// because the sql code needs access to a table that contains execution-related types that can't be a direct dependency
54
    /// of the sql crate (namely, the `CteWorktable`).
55
    /// The [`ContextProvider`] provides a way to "hide" this dependency.
56
0
    fn create_cte_work_table(
57
0
        &self,
58
0
        _name: &str,
59
0
        _schema: SchemaRef,
60
0
    ) -> Result<Arc<dyn TableSource>> {
61
0
        not_impl_err!("Recursive CTE is not implemented")
62
0
    }
63
64
    /// Getter for expr planners
65
0
    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
66
0
        &[]
67
0
    }
68
69
    /// Getter for a UDF description
70
    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
71
    /// Getter for a UDAF description
72
    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>>;
73
    /// Getter for a UDWF
74
    fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>>;
75
    /// Getter for system/user-defined variable type
76
    fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType>;
77
78
    /// Get configuration options
79
    fn options(&self) -> &ConfigOptions;
80
81
    /// Get all user defined scalar function names
82
    fn udf_names(&self) -> Vec<String>;
83
84
    /// Get all user defined aggregate function names
85
    fn udaf_names(&self) -> Vec<String>;
86
87
    /// Get all user defined window function names
88
    fn udwf_names(&self) -> Vec<String>;
89
}
90
91
/// This trait allows users to customize the behavior of the SQL planner
92
pub trait ExprPlanner: Debug + Send + Sync {
93
    /// Plan the binary operation between two expressions, returns original
94
    /// BinaryExpr if not possible
95
0
    fn plan_binary_op(
96
0
        &self,
97
0
        expr: RawBinaryExpr,
98
0
        _schema: &DFSchema,
99
0
    ) -> Result<PlannerResult<RawBinaryExpr>> {
100
0
        Ok(PlannerResult::Original(expr))
101
0
    }
102
103
    /// Plan the field access expression
104
    ///
105
    /// returns original FieldAccessExpr if not possible
106
0
    fn plan_field_access(
107
0
        &self,
108
0
        expr: RawFieldAccessExpr,
109
0
        _schema: &DFSchema,
110
0
    ) -> Result<PlannerResult<RawFieldAccessExpr>> {
111
0
        Ok(PlannerResult::Original(expr))
112
0
    }
113
114
    /// Plan the array literal, returns OriginalArray if not possible
115
    ///
116
    /// Returns origin expression arguments if not possible
117
0
    fn plan_array_literal(
118
0
        &self,
119
0
        exprs: Vec<Expr>,
120
0
        _schema: &DFSchema,
121
0
    ) -> Result<PlannerResult<Vec<Expr>>> {
122
0
        Ok(PlannerResult::Original(exprs))
123
0
    }
124
125
    // Plan the POSITION expression, e.g., POSITION(<expr> in <expr>)
126
    // returns origin expression arguments if not possible
127
0
    fn plan_position(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
128
0
        Ok(PlannerResult::Original(args))
129
0
    }
130
131
    /// Plan the dictionary literal `{ key: value, ...}`
132
    ///
133
    /// Returns origin expression arguments if not possible
134
0
    fn plan_dictionary_literal(
135
0
        &self,
136
0
        expr: RawDictionaryExpr,
137
0
        _schema: &DFSchema,
138
0
    ) -> Result<PlannerResult<RawDictionaryExpr>> {
139
0
        Ok(PlannerResult::Original(expr))
140
0
    }
141
142
    /// Plan an extract expression, e.g., `EXTRACT(month FROM foo)`
143
    ///
144
    /// Returns origin expression arguments if not possible
145
0
    fn plan_extract(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
146
0
        Ok(PlannerResult::Original(args))
147
0
    }
148
149
    /// Plan an substring expression, e.g., `SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])`
150
    ///
151
    /// Returns origin expression arguments if not possible
152
0
    fn plan_substring(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
153
0
        Ok(PlannerResult::Original(args))
154
0
    }
155
156
    /// Plans a struct `struct(expression1[, ..., expression_n])`
157
    /// literal based on the given input expressions.
158
    /// This function takes a vector of expressions and a boolean flag indicating whether
159
    /// the struct uses the optional name
160
    ///
161
    /// Returns a `PlannerResult` containing either the planned struct expressions or the original
162
    /// input expressions if planning is not possible.
163
0
    fn plan_struct_literal(
164
0
        &self,
165
0
        args: Vec<Expr>,
166
0
        _is_named_struct: bool,
167
0
    ) -> Result<PlannerResult<Vec<Expr>>> {
168
0
        Ok(PlannerResult::Original(args))
169
0
    }
170
171
    /// Plans an overlay expression eg `overlay(str PLACING substr FROM pos [FOR count])`
172
    ///
173
    /// Returns origin expression arguments if not possible
174
0
    fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
175
0
        Ok(PlannerResult::Original(args))
176
0
    }
177
178
    /// Plan a make_map expression, e.g., `make_map(key1, value1, key2, value2, ...)`
179
    ///
180
    /// Returns origin expression arguments if not possible
181
0
    fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
182
0
        Ok(PlannerResult::Original(args))
183
0
    }
184
185
    /// Plans compound identifier eg `db.schema.table` for non-empty nested names
186
    ///
187
    /// Note:
188
    /// Currently compound identifier for outer query schema is not supported.
189
    ///
190
    /// Returns planned expression
191
0
    fn plan_compound_identifier(
192
0
        &self,
193
0
        _field: &Field,
194
0
        _qualifier: Option<&TableReference>,
195
0
        _nested_names: &[String],
196
0
    ) -> Result<PlannerResult<Vec<Expr>>> {
197
0
        not_impl_err!(
198
0
            "Default planner compound identifier hasn't been implemented for ExprPlanner"
199
0
        )
200
0
    }
201
202
    /// Plans `ANY` expression, e.g., `expr = ANY(array_expr)`
203
    ///
204
    /// Returns origin binary expression if not possible
205
0
    fn plan_any(&self, expr: RawBinaryExpr) -> Result<PlannerResult<RawBinaryExpr>> {
206
0
        Ok(PlannerResult::Original(expr))
207
0
    }
208
}
209
210
/// An operator with two arguments to plan
211
///
212
/// Note `left` and `right` are DataFusion [`Expr`]s but the `op` is the SQL AST
213
/// operator.
214
///
215
/// This structure is used by [`ExprPlanner`] to plan operators with
216
/// custom expressions.
217
#[derive(Debug, Clone)]
218
pub struct RawBinaryExpr {
219
    pub op: sqlparser::ast::BinaryOperator,
220
    pub left: Expr,
221
    pub right: Expr,
222
}
223
224
/// An expression with GetFieldAccess to plan
225
///
226
/// This structure is used by [`ExprPlanner`] to plan operators with
227
/// custom expressions.
228
#[derive(Debug, Clone)]
229
pub struct RawFieldAccessExpr {
230
    pub field_access: GetFieldAccess,
231
    pub expr: Expr,
232
}
233
234
/// A Dictionary literal expression `{ key: value, ...}`
235
///
236
/// This structure is used by [`ExprPlanner`] to plan operators with
237
/// custom expressions.
238
#[derive(Debug, Clone)]
239
pub struct RawDictionaryExpr {
240
    pub keys: Vec<Expr>,
241
    pub values: Vec<Expr>,
242
}
243
244
/// Result of planning a raw expr with [`ExprPlanner`]
245
#[derive(Debug, Clone)]
246
pub enum PlannerResult<T> {
247
    /// The raw expression was successfully planned as a new [`Expr`]
248
    Planned(Expr),
249
    /// The raw expression could not be planned, and is returned unmodified
250
    Original(T),
251
}