Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr-common/src/physical_expr.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::any::Any;
19
use std::fmt::{Debug, Display, Formatter};
20
use std::hash::{Hash, Hasher};
21
use std::sync::Arc;
22
23
use crate::utils::scatter;
24
25
use arrow::array::BooleanArray;
26
use arrow::compute::filter_record_batch;
27
use arrow::datatypes::{DataType, Schema};
28
use arrow::record_batch::RecordBatch;
29
use datafusion_common::{internal_err, not_impl_err, Result};
30
use datafusion_expr_common::columnar_value::ColumnarValue;
31
use datafusion_expr_common::interval_arithmetic::Interval;
32
use datafusion_expr_common::sort_properties::ExprProperties;
33
34
/// [`PhysicalExpr`]s represent expressions such as `A + 1` or `CAST(c1 AS int)`.
35
///
36
/// `PhysicalExpr` knows its type, nullability and can be evaluated directly on
37
/// a [`RecordBatch`] (see [`Self::evaluate`]).
38
///
39
/// `PhysicalExpr` are the physical counterpart to [`Expr`] used in logical
40
/// planning. They are typically created from [`Expr`] by a [`PhysicalPlanner`]
41
/// invoked from a higher level API
42
///
43
/// Some important examples of `PhysicalExpr` are:
44
/// * [`Column`]: Represents a column at a given index in a RecordBatch
45
///
46
/// To create `PhysicalExpr` from  `Expr`, see
47
/// * [`SessionContext::create_physical_expr`]: A high level API
48
/// * [`create_physical_expr`]: A low level API
49
///
50
/// [`SessionContext::create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.create_physical_expr
51
/// [`PhysicalPlanner`]: https://docs.rs/datafusion/latest/datafusion/physical_planner/trait.PhysicalPlanner.html
52
/// [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html
53
/// [`create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html
54
/// [`Column`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/expressions/struct.Column.html
55
pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> {
56
    /// Returns the physical expression as [`Any`] so that it can be
57
    /// downcast to a specific implementation.
58
    fn as_any(&self) -> &dyn Any;
59
    /// Get the data type of this expression, given the schema of the input
60
    fn data_type(&self, input_schema: &Schema) -> Result<DataType>;
61
    /// Determine whether this expression is nullable, given the schema of the input
62
    fn nullable(&self, input_schema: &Schema) -> Result<bool>;
63
    /// Evaluate an expression against a RecordBatch
64
    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue>;
65
    /// Evaluate an expression against a RecordBatch after first applying a
66
    /// validity array
67
0
    fn evaluate_selection(
68
0
        &self,
69
0
        batch: &RecordBatch,
70
0
        selection: &BooleanArray,
71
0
    ) -> Result<ColumnarValue> {
72
0
        let tmp_batch = filter_record_batch(batch, selection)?;
73
74
0
        let tmp_result = self.evaluate(&tmp_batch)?;
75
76
0
        if batch.num_rows() == tmp_batch.num_rows() {
77
            // All values from the `selection` filter are true.
78
0
            Ok(tmp_result)
79
0
        } else if let ColumnarValue::Array(a) = tmp_result {
80
0
            scatter(selection, a.as_ref()).map(ColumnarValue::Array)
81
        } else {
82
0
            Ok(tmp_result)
83
        }
84
0
    }
85
86
    /// Get a list of child PhysicalExpr that provide the input for this expr.
87
    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>>;
88
89
    /// Returns a new PhysicalExpr where all children were replaced by new exprs.
90
    fn with_new_children(
91
        self: Arc<Self>,
92
        children: Vec<Arc<dyn PhysicalExpr>>,
93
    ) -> Result<Arc<dyn PhysicalExpr>>;
94
95
    /// Computes the output interval for the expression, given the input
96
    /// intervals.
97
    ///
98
    /// # Arguments
99
    ///
100
    /// * `children` are the intervals for the children (inputs) of this
101
    ///   expression.
102
    ///
103
    /// # Example
104
    ///
105
    /// If the expression is `a + b`, and the input intervals are `a: [1, 2]`
106
    /// and `b: [3, 4]`, then the output interval would be `[4, 6]`.
107
0
    fn evaluate_bounds(&self, _children: &[&Interval]) -> Result<Interval> {
108
0
        not_impl_err!("Not implemented for {self}")
109
0
    }
110
111
    /// Updates bounds for child expressions, given a known interval for this
112
    /// expression.
113
    ///
114
    /// This is used to propagate constraints down through an expression tree.
115
    ///
116
    /// # Arguments
117
    ///
118
    /// * `interval` is the currently known interval for this expression.
119
    /// * `children` are the current intervals for the children of this expression.
120
    ///
121
    /// # Returns
122
    ///
123
    /// A `Vec` of new intervals for the children, in order.
124
    ///
125
    /// If constraint propagation reveals an infeasibility for any child, returns
126
    /// [`None`]. If none of the children intervals change as a result of propagation,
127
    /// may return an empty vector instead of cloning `children`. This is the default
128
    /// (and conservative) return value.
129
    ///
130
    /// # Example
131
    ///
132
    /// If the expression is `a + b`, the current `interval` is `[4, 5]` and the
133
    /// inputs `a` and `b` are respectively given as `[0, 2]` and `[-∞, 4]`, then
134
    /// propagation would return `[0, 2]` and `[2, 4]` as `b` must be at least
135
    /// `2` to make the output at least `4`.
136
0
    fn propagate_constraints(
137
0
        &self,
138
0
        _interval: &Interval,
139
0
        _children: &[&Interval],
140
0
    ) -> Result<Option<Vec<Interval>>> {
141
0
        Ok(Some(vec![]))
142
0
    }
143
144
    /// Update the hash `state` with this expression requirements from
145
    /// [`Hash`].
146
    ///
147
    /// This method is required to support hashing [`PhysicalExpr`]s.  To
148
    /// implement it, typically the type implementing
149
    /// [`PhysicalExpr`] implements [`Hash`] and
150
    /// then the following boiler plate is used:
151
    ///
152
    /// # Example:
153
    /// ```
154
    /// // User defined expression that derives Hash
155
    /// #[derive(Hash, Debug, PartialEq, Eq)]
156
    /// struct MyExpr {
157
    ///   val: u64
158
    /// }
159
    ///
160
    /// // impl PhysicalExpr {
161
    /// // ...
162
    /// # impl MyExpr {
163
    ///   // Boiler plate to call the derived Hash impl
164
    ///   fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
165
    ///     use std::hash::Hash;
166
    ///     let mut s = state;
167
    ///     self.hash(&mut s);
168
    ///   }
169
    /// // }
170
    /// # }
171
    /// ```
172
    /// Note: [`PhysicalExpr`] is not constrained by [`Hash`]
173
    /// directly because it must remain object safe.
174
    fn dyn_hash(&self, _state: &mut dyn Hasher);
175
176
    /// Calculates the properties of this [`PhysicalExpr`] based on its
177
    /// children's properties (i.e. order and range), recursively aggregating
178
    /// the information from its children. In cases where the [`PhysicalExpr`]
179
    /// has no children (e.g., `Literal` or `Column`), these properties should
180
    /// be specified externally, as the function defaults to unknown properties.
181
0
    fn get_properties(&self, _children: &[ExprProperties]) -> Result<ExprProperties> {
182
0
        Ok(ExprProperties::new_unknown())
183
0
    }
184
}
185
186
impl Hash for dyn PhysicalExpr {
187
20
    fn hash<H: Hasher>(&self, state: &mut H) {
188
20
        self.dyn_hash(state);
189
20
    }
190
}
191
192
/// Returns a copy of this expr if we change any child according to the pointer comparison.
193
/// The size of `children` must be equal to the size of `PhysicalExpr::children()`.
194
8.17k
pub fn with_new_children_if_necessary(
195
8.17k
    expr: Arc<dyn PhysicalExpr>,
196
8.17k
    children: Vec<Arc<dyn PhysicalExpr>>,
197
8.17k
) -> Result<Arc<dyn PhysicalExpr>> {
198
8.17k
    let old_children = expr.children();
199
8.17k
    if children.len() != old_children.len() {
200
0
        internal_err!("PhysicalExpr: Wrong number of children")
201
8.17k
    } else if children.is_empty()
202
8.17k
        || children
203
8.17k
            .iter()
204
8.17k
            .zip(old_children.iter())
205
15.9k
            .any(|(c1, c2)| !Arc::ptr_eq(c1, c2)
)8.17k
206
    {
207
66
        Ok(expr.with_new_children(children)
?0
)
208
    } else {
209
8.10k
        Ok(expr)
210
    }
211
8.17k
}
212
213
175k
pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
214
175k
    if any.is::<Arc<dyn PhysicalExpr>>() {
215
175k
        any.downcast_ref::<Arc<dyn PhysicalExpr>>()
216
175k
            .unwrap()
217
175k
            .as_any()
218
0
    } else if any.is::<Box<dyn PhysicalExpr>>() {
219
0
        any.downcast_ref::<Box<dyn PhysicalExpr>>()
220
0
            .unwrap()
221
0
            .as_any()
222
    } else {
223
0
        any
224
    }
225
175k
}
226
227
/// Returns [`Display`] able a list of [`PhysicalExpr`]
228
///
229
/// Example output: `[a + 1, b]`
230
0
pub fn format_physical_expr_list(exprs: &[Arc<dyn PhysicalExpr>]) -> impl Display + '_ {
231
    struct DisplayWrapper<'a>(&'a [Arc<dyn PhysicalExpr>]);
232
    impl<'a> Display for DisplayWrapper<'a> {
233
0
        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
234
0
            let mut iter = self.0.iter();
235
0
            write!(f, "[")?;
236
0
            if let Some(expr) = iter.next() {
237
0
                write!(f, "{}", expr)?;
238
0
            }
239
0
            for expr in iter {
240
0
                write!(f, ", {}", expr)?;
241
            }
242
0
            write!(f, "]")?;
243
0
            Ok(())
244
0
        }
245
    }
246
0
    DisplayWrapper(exprs)
247
0
}