Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/planner.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::sync::Arc;
19
20
use crate::scalar_function;
21
use crate::{
22
    expressions::{self, binary, like, similar_to, Column, Literal},
23
    PhysicalExpr,
24
};
25
26
use arrow::datatypes::Schema;
27
use datafusion_common::{
28
    exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema,
29
};
30
use datafusion_expr::execution_props::ExecutionProps;
31
use datafusion_expr::expr::{Alias, Cast, InList, ScalarFunction};
32
use datafusion_expr::var_provider::is_system_variables;
33
use datafusion_expr::var_provider::VarType;
34
use datafusion_expr::{
35
    binary_expr, lit, Between, BinaryExpr, Expr, Like, Operator, TryCast,
36
};
37
38
/// [PhysicalExpr] evaluate DataFusion expressions such as `A + 1`, or `CAST(c1
39
/// AS int)`.
40
///
41
/// [PhysicalExpr] are the physical counterpart to [Expr] used in logical
42
/// planning, and can be evaluated directly on a [RecordBatch]. They are
43
/// normally created from [Expr] by a [PhysicalPlanner] and can be created
44
/// directly using [create_physical_expr].
45
///
46
/// A Physical expression knows its type, nullability and how to evaluate itself.
47
///
48
/// [PhysicalPlanner]: https://docs.rs/datafusion/latest/datafusion/physical_planner/trait.PhysicalPlanner.html
49
/// [RecordBatch]: https://docs.rs/arrow/latest/arrow/record_batch/struct.RecordBatch.html
50
///
51
/// # Example: Create `PhysicalExpr` from `Expr`
52
/// ```
53
/// # use arrow::datatypes::{DataType, Field, Schema};
54
/// # use datafusion_common::DFSchema;
55
/// # use datafusion_expr::{Expr, col, lit};
56
/// # use datafusion_physical_expr::create_physical_expr;
57
/// # use datafusion_expr::execution_props::ExecutionProps;
58
/// // For a logical expression `a = 1`, we can create a physical expression
59
/// let expr = col("a").eq(lit(1));
60
/// // To create a PhysicalExpr we need 1. a schema
61
/// let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
62
/// let df_schema = DFSchema::try_from(schema).unwrap();
63
/// // 2. ExecutionProps
64
/// let props = ExecutionProps::new();
65
/// // We can now create a PhysicalExpr:
66
/// let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap();
67
/// ```
68
///
69
/// # Example: Executing a PhysicalExpr to obtain [ColumnarValue]
70
/// ```
71
/// # use std::sync::Arc;
72
/// # use arrow::array::{cast::AsArray, BooleanArray, Int32Array, RecordBatch};
73
/// # use arrow::datatypes::{DataType, Field, Schema};
74
/// # use datafusion_common::{assert_batches_eq, DFSchema};
75
/// # use datafusion_expr::{Expr, col, lit, ColumnarValue};
76
/// # use datafusion_physical_expr::create_physical_expr;
77
/// # use datafusion_expr::execution_props::ExecutionProps;
78
/// # let expr = col("a").eq(lit(1));
79
/// # let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
80
/// # let df_schema = DFSchema::try_from(schema.clone()).unwrap();
81
/// # let props = ExecutionProps::new();
82
/// // Given a PhysicalExpr, for `a = 1` we can evaluate it against a RecordBatch like this:
83
/// let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap();
84
/// // Input of [1,2,3]
85
/// let input_batch = RecordBatch::try_from_iter(vec![
86
///   ("a", Arc::new(Int32Array::from(vec![1, 2, 3])) as _)
87
/// ]).unwrap();
88
/// // The result is a ColumnarValue (either an Array or a Scalar)
89
/// let result = physical_expr.evaluate(&input_batch).unwrap();
90
/// // In this case, a BooleanArray with the result of the comparison
91
/// let ColumnarValue::Array(arr) = result else {
92
///  panic!("Expected an array")
93
/// };
94
/// assert_eq!(arr.as_boolean(), &BooleanArray::from(vec![true, false, false]));
95
/// ```
96
///
97
/// [ColumnarValue]: datafusion_expr::ColumnarValue
98
///
99
/// Create a physical expression from a logical expression ([Expr]).
100
///
101
/// # Arguments
102
///
103
/// * `e` - The logical expression
104
/// * `input_dfschema` - The DataFusion schema for the input, used to resolve `Column` references
105
///                      to qualified or unqualified fields by name.
106
0
pub fn create_physical_expr(
107
0
    e: &Expr,
108
0
    input_dfschema: &DFSchema,
109
0
    execution_props: &ExecutionProps,
110
0
) -> Result<Arc<dyn PhysicalExpr>> {
111
0
    let input_schema: &Schema = &input_dfschema.into();
112
0
113
0
    match e {
114
0
        Expr::Alias(Alias { expr, .. }) => {
115
0
            Ok(create_physical_expr(expr, input_dfschema, execution_props)?)
116
        }
117
0
        Expr::Column(c) => {
118
0
            let idx = input_dfschema.index_of_column(c)?;
119
0
            Ok(Arc::new(Column::new(&c.name, idx)))
120
        }
121
0
        Expr::Literal(value) => Ok(Arc::new(Literal::new(value.clone()))),
122
0
        Expr::ScalarVariable(_, variable_names) => {
123
0
            if is_system_variables(variable_names) {
124
0
                match execution_props.get_var_provider(VarType::System) {
125
0
                    Some(provider) => {
126
0
                        let scalar_value = provider.get_value(variable_names.clone())?;
127
0
                        Ok(Arc::new(Literal::new(scalar_value)))
128
                    }
129
0
                    _ => plan_err!("No system variable provider found"),
130
                }
131
            } else {
132
0
                match execution_props.get_var_provider(VarType::UserDefined) {
133
0
                    Some(provider) => {
134
0
                        let scalar_value = provider.get_value(variable_names.clone())?;
135
0
                        Ok(Arc::new(Literal::new(scalar_value)))
136
                    }
137
0
                    _ => plan_err!("No user defined variable provider found"),
138
                }
139
            }
140
        }
141
0
        Expr::IsTrue(expr) => {
142
0
            let binary_op = binary_expr(
143
0
                expr.as_ref().clone(),
144
0
                Operator::IsNotDistinctFrom,
145
0
                lit(true),
146
0
            );
147
0
            create_physical_expr(&binary_op, input_dfschema, execution_props)
148
        }
149
0
        Expr::IsNotTrue(expr) => {
150
0
            let binary_op =
151
0
                binary_expr(expr.as_ref().clone(), Operator::IsDistinctFrom, lit(true));
152
0
            create_physical_expr(&binary_op, input_dfschema, execution_props)
153
        }
154
0
        Expr::IsFalse(expr) => {
155
0
            let binary_op = binary_expr(
156
0
                expr.as_ref().clone(),
157
0
                Operator::IsNotDistinctFrom,
158
0
                lit(false),
159
0
            );
160
0
            create_physical_expr(&binary_op, input_dfschema, execution_props)
161
        }
162
0
        Expr::IsNotFalse(expr) => {
163
0
            let binary_op =
164
0
                binary_expr(expr.as_ref().clone(), Operator::IsDistinctFrom, lit(false));
165
0
            create_physical_expr(&binary_op, input_dfschema, execution_props)
166
        }
167
0
        Expr::IsUnknown(expr) => {
168
0
            let binary_op = binary_expr(
169
0
                expr.as_ref().clone(),
170
0
                Operator::IsNotDistinctFrom,
171
0
                Expr::Literal(ScalarValue::Boolean(None)),
172
0
            );
173
0
            create_physical_expr(&binary_op, input_dfschema, execution_props)
174
        }
175
0
        Expr::IsNotUnknown(expr) => {
176
0
            let binary_op = binary_expr(
177
0
                expr.as_ref().clone(),
178
0
                Operator::IsDistinctFrom,
179
0
                Expr::Literal(ScalarValue::Boolean(None)),
180
0
            );
181
0
            create_physical_expr(&binary_op, input_dfschema, execution_props)
182
        }
183
0
        Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
184
            // Create physical expressions for left and right operands
185
0
            let lhs = create_physical_expr(left, input_dfschema, execution_props)?;
186
0
            let rhs = create_physical_expr(right, input_dfschema, execution_props)?;
187
            // Note that the logical planner is responsible
188
            // for type coercion on the arguments (e.g. if one
189
            // argument was originally Int32 and one was
190
            // Int64 they will both be coerced to Int64).
191
            //
192
            // There should be no coercion during physical
193
            // planning.
194
0
            binary(lhs, *op, rhs, input_schema)
195
        }
196
        Expr::Like(Like {
197
0
            negated,
198
0
            expr,
199
0
            pattern,
200
0
            escape_char,
201
0
            case_insensitive,
202
0
        }) => {
203
0
            if escape_char.is_some() {
204
0
                return exec_err!("LIKE does not support escape_char");
205
0
            }
206
0
            let physical_expr =
207
0
                create_physical_expr(expr, input_dfschema, execution_props)?;
208
0
            let physical_pattern =
209
0
                create_physical_expr(pattern, input_dfschema, execution_props)?;
210
0
            like(
211
0
                *negated,
212
0
                *case_insensitive,
213
0
                physical_expr,
214
0
                physical_pattern,
215
0
                input_schema,
216
0
            )
217
        }
218
        Expr::SimilarTo(Like {
219
0
            negated,
220
0
            expr,
221
0
            pattern,
222
0
            escape_char,
223
0
            case_insensitive,
224
0
        }) => {
225
0
            if escape_char.is_some() {
226
0
                return exec_err!("SIMILAR TO does not support escape_char yet");
227
0
            }
228
0
            let physical_expr =
229
0
                create_physical_expr(expr, input_dfschema, execution_props)?;
230
0
            let physical_pattern =
231
0
                create_physical_expr(pattern, input_dfschema, execution_props)?;
232
0
            similar_to(*negated, *case_insensitive, physical_expr, physical_pattern)
233
        }
234
0
        Expr::Case(case) => {
235
0
            let expr: Option<Arc<dyn PhysicalExpr>> = if let Some(e) = &case.expr {
236
0
                Some(create_physical_expr(
237
0
                    e.as_ref(),
238
0
                    input_dfschema,
239
0
                    execution_props,
240
0
                )?)
241
            } else {
242
0
                None
243
            };
244
0
            let (when_expr, then_expr): (Vec<&Expr>, Vec<&Expr>) = case
245
0
                .when_then_expr
246
0
                .iter()
247
0
                .map(|(w, t)| (w.as_ref(), t.as_ref()))
248
0
                .unzip();
249
0
            let when_expr =
250
0
                create_physical_exprs(when_expr, input_dfschema, execution_props)?;
251
0
            let then_expr =
252
0
                create_physical_exprs(then_expr, input_dfschema, execution_props)?;
253
0
            let when_then_expr: Vec<(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)> =
254
0
                when_expr
255
0
                    .iter()
256
0
                    .zip(then_expr.iter())
257
0
                    .map(|(w, t)| (Arc::clone(w), Arc::clone(t)))
258
0
                    .collect();
259
0
            let else_expr: Option<Arc<dyn PhysicalExpr>> =
260
0
                if let Some(e) = &case.else_expr {
261
0
                    Some(create_physical_expr(
262
0
                        e.as_ref(),
263
0
                        input_dfschema,
264
0
                        execution_props,
265
0
                    )?)
266
                } else {
267
0
                    None
268
                };
269
0
            Ok(expressions::case(expr, when_then_expr, else_expr)?)
270
        }
271
0
        Expr::Cast(Cast { expr, data_type }) => expressions::cast(
272
0
            create_physical_expr(expr, input_dfschema, execution_props)?,
273
0
            input_schema,
274
0
            data_type.clone(),
275
        ),
276
0
        Expr::TryCast(TryCast { expr, data_type }) => expressions::try_cast(
277
0
            create_physical_expr(expr, input_dfschema, execution_props)?,
278
0
            input_schema,
279
0
            data_type.clone(),
280
        ),
281
0
        Expr::Not(expr) => {
282
0
            expressions::not(create_physical_expr(expr, input_dfschema, execution_props)?)
283
        }
284
0
        Expr::Negative(expr) => expressions::negative(
285
0
            create_physical_expr(expr, input_dfschema, execution_props)?,
286
0
            input_schema,
287
        ),
288
0
        Expr::IsNull(expr) => expressions::is_null(create_physical_expr(
289
0
            expr,
290
0
            input_dfschema,
291
0
            execution_props,
292
0
        )?),
293
0
        Expr::IsNotNull(expr) => expressions::is_not_null(create_physical_expr(
294
0
            expr,
295
0
            input_dfschema,
296
0
            execution_props,
297
0
        )?),
298
0
        Expr::ScalarFunction(ScalarFunction { func, args }) => {
299
0
            let physical_args =
300
0
                create_physical_exprs(args, input_dfschema, execution_props)?;
301
302
0
            scalar_function::create_physical_expr(
303
0
                Arc::clone(func).as_ref(),
304
0
                &physical_args,
305
0
                input_schema,
306
0
                args,
307
0
                input_dfschema,
308
0
            )
309
        }
310
        Expr::Between(Between {
311
0
            expr,
312
0
            negated,
313
0
            low,
314
0
            high,
315
        }) => {
316
0
            let value_expr = create_physical_expr(expr, input_dfschema, execution_props)?;
317
0
            let low_expr = create_physical_expr(low, input_dfschema, execution_props)?;
318
0
            let high_expr = create_physical_expr(high, input_dfschema, execution_props)?;
319
320
            // rewrite the between into the two binary operators
321
0
            let binary_expr = binary(
322
0
                binary(
323
0
                    Arc::clone(&value_expr),
324
0
                    Operator::GtEq,
325
0
                    low_expr,
326
0
                    input_schema,
327
0
                )?,
328
0
                Operator::And,
329
0
                binary(
330
0
                    Arc::clone(&value_expr),
331
0
                    Operator::LtEq,
332
0
                    high_expr,
333
0
                    input_schema,
334
0
                )?,
335
0
                input_schema,
336
0
            );
337
0
338
0
            if *negated {
339
0
                expressions::not(binary_expr?)
340
            } else {
341
0
                binary_expr
342
            }
343
        }
344
        Expr::InList(InList {
345
0
            expr,
346
0
            list,
347
0
            negated,
348
0
        }) => match expr.as_ref() {
349
            Expr::Literal(ScalarValue::Utf8(None)) => {
350
0
                Ok(expressions::lit(ScalarValue::Boolean(None)))
351
            }
352
            _ => {
353
0
                let value_expr =
354
0
                    create_physical_expr(expr, input_dfschema, execution_props)?;
355
356
0
                let list_exprs =
357
0
                    create_physical_exprs(list, input_dfschema, execution_props)?;
358
0
                expressions::in_list(value_expr, list_exprs, negated, input_schema)
359
            }
360
        },
361
0
        other => {
362
0
            not_impl_err!("Physical plan does not support logical expression {other:?}")
363
        }
364
    }
365
0
}
366
367
/// Create vector of Physical Expression from a vector of logical expression
368
0
pub fn create_physical_exprs<'a, I>(
369
0
    exprs: I,
370
0
    input_dfschema: &DFSchema,
371
0
    execution_props: &ExecutionProps,
372
0
) -> Result<Vec<Arc<dyn PhysicalExpr>>>
373
0
where
374
0
    I: IntoIterator<Item = &'a Expr>,
375
0
{
376
0
    exprs
377
0
        .into_iter()
378
0
        .map(|expr| create_physical_expr(expr, input_dfschema, execution_props))
379
0
        .collect::<Result<Vec<_>>>()
380
0
}
381
382
/// Convert a logical expression to a physical expression (without any simplification, etc)
383
0
pub fn logical2physical(expr: &Expr, schema: &Schema) -> Arc<dyn PhysicalExpr> {
384
0
    let df_schema = schema.clone().to_dfschema().unwrap();
385
0
    let execution_props = ExecutionProps::new();
386
0
    create_physical_expr(expr, &df_schema, &execution_props).unwrap()
387
0
}
388
389
#[cfg(test)]
390
mod tests {
391
    use arrow_array::{ArrayRef, BooleanArray, RecordBatch, StringArray};
392
    use arrow_schema::{DataType, Field};
393
394
    use datafusion_expr::{col, lit};
395
396
    use super::*;
397
398
    #[test]
399
    fn test_create_physical_expr_scalar_input_output() -> Result<()> {
400
        let expr = col("letter").eq(lit("A"));
401
402
        let schema = Schema::new(vec![Field::new("letter", DataType::Utf8, false)]);
403
        let df_schema = DFSchema::try_from_qualified_schema("data", &schema)?;
404
        let p = create_physical_expr(&expr, &df_schema, &ExecutionProps::new())?;
405
406
        let batch = RecordBatch::try_new(
407
            Arc::new(schema),
408
            vec![Arc::new(StringArray::from_iter_values(vec![
409
                "A", "B", "C", "D",
410
            ]))],
411
        )?;
412
        let result = p.evaluate(&batch)?;
413
        let result = result.into_array(4).expect("Failed to convert to array");
414
415
        assert_eq!(
416
            &result,
417
            &(Arc::new(BooleanArray::from(vec![true, false, false, false,])) as ArrayRef)
418
        );
419
420
        Ok(())
421
    }
422
}