/Users/andrewlamb/Software/datafusion/datafusion/physical-expr-common/src/physical_expr.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use std::any::Any; |
19 | | use std::fmt::{Debug, Display, Formatter}; |
20 | | use std::hash::{Hash, Hasher}; |
21 | | use std::sync::Arc; |
22 | | |
23 | | use crate::utils::scatter; |
24 | | |
25 | | use arrow::array::BooleanArray; |
26 | | use arrow::compute::filter_record_batch; |
27 | | use arrow::datatypes::{DataType, Schema}; |
28 | | use arrow::record_batch::RecordBatch; |
29 | | use datafusion_common::{internal_err, not_impl_err, Result}; |
30 | | use datafusion_expr_common::columnar_value::ColumnarValue; |
31 | | use datafusion_expr_common::interval_arithmetic::Interval; |
32 | | use datafusion_expr_common::sort_properties::ExprProperties; |
33 | | |
34 | | /// [`PhysicalExpr`]s represent expressions such as `A + 1` or `CAST(c1 AS int)`. |
35 | | /// |
36 | | /// `PhysicalExpr` knows its type, nullability and can be evaluated directly on |
37 | | /// a [`RecordBatch`] (see [`Self::evaluate`]). |
38 | | /// |
39 | | /// `PhysicalExpr` are the physical counterpart to [`Expr`] used in logical |
40 | | /// planning. They are typically created from [`Expr`] by a [`PhysicalPlanner`] |
41 | | /// invoked from a higher level API |
42 | | /// |
43 | | /// Some important examples of `PhysicalExpr` are: |
44 | | /// * [`Column`]: Represents a column at a given index in a RecordBatch |
45 | | /// |
46 | | /// To create `PhysicalExpr` from `Expr`, see |
47 | | /// * [`SessionContext::create_physical_expr`]: A high level API |
48 | | /// * [`create_physical_expr`]: A low level API |
49 | | /// |
50 | | /// [`SessionContext::create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.create_physical_expr |
51 | | /// [`PhysicalPlanner`]: https://docs.rs/datafusion/latest/datafusion/physical_planner/trait.PhysicalPlanner.html |
52 | | /// [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html |
53 | | /// [`create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html |
54 | | /// [`Column`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/expressions/struct.Column.html |
55 | | pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> { |
56 | | /// Returns the physical expression as [`Any`] so that it can be |
57 | | /// downcast to a specific implementation. |
58 | | fn as_any(&self) -> &dyn Any; |
59 | | /// Get the data type of this expression, given the schema of the input |
60 | | fn data_type(&self, input_schema: &Schema) -> Result<DataType>; |
61 | | /// Determine whether this expression is nullable, given the schema of the input |
62 | | fn nullable(&self, input_schema: &Schema) -> Result<bool>; |
63 | | /// Evaluate an expression against a RecordBatch |
64 | | fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue>; |
65 | | /// Evaluate an expression against a RecordBatch after first applying a |
66 | | /// validity array |
67 | 0 | fn evaluate_selection( |
68 | 0 | &self, |
69 | 0 | batch: &RecordBatch, |
70 | 0 | selection: &BooleanArray, |
71 | 0 | ) -> Result<ColumnarValue> { |
72 | 0 | let tmp_batch = filter_record_batch(batch, selection)?; |
73 | | |
74 | 0 | let tmp_result = self.evaluate(&tmp_batch)?; |
75 | | |
76 | 0 | if batch.num_rows() == tmp_batch.num_rows() { |
77 | | // All values from the `selection` filter are true. |
78 | 0 | Ok(tmp_result) |
79 | 0 | } else if let ColumnarValue::Array(a) = tmp_result { |
80 | 0 | scatter(selection, a.as_ref()).map(ColumnarValue::Array) |
81 | | } else { |
82 | 0 | Ok(tmp_result) |
83 | | } |
84 | 0 | } |
85 | | |
86 | | /// Get a list of child PhysicalExpr that provide the input for this expr. |
87 | | fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>>; |
88 | | |
89 | | /// Returns a new PhysicalExpr where all children were replaced by new exprs. |
90 | | fn with_new_children( |
91 | | self: Arc<Self>, |
92 | | children: Vec<Arc<dyn PhysicalExpr>>, |
93 | | ) -> Result<Arc<dyn PhysicalExpr>>; |
94 | | |
95 | | /// Computes the output interval for the expression, given the input |
96 | | /// intervals. |
97 | | /// |
98 | | /// # Arguments |
99 | | /// |
100 | | /// * `children` are the intervals for the children (inputs) of this |
101 | | /// expression. |
102 | | /// |
103 | | /// # Example |
104 | | /// |
105 | | /// If the expression is `a + b`, and the input intervals are `a: [1, 2]` |
106 | | /// and `b: [3, 4]`, then the output interval would be `[4, 6]`. |
107 | 0 | fn evaluate_bounds(&self, _children: &[&Interval]) -> Result<Interval> { |
108 | 0 | not_impl_err!("Not implemented for {self}") |
109 | 0 | } |
110 | | |
111 | | /// Updates bounds for child expressions, given a known interval for this |
112 | | /// expression. |
113 | | /// |
114 | | /// This is used to propagate constraints down through an expression tree. |
115 | | /// |
116 | | /// # Arguments |
117 | | /// |
118 | | /// * `interval` is the currently known interval for this expression. |
119 | | /// * `children` are the current intervals for the children of this expression. |
120 | | /// |
121 | | /// # Returns |
122 | | /// |
123 | | /// A `Vec` of new intervals for the children, in order. |
124 | | /// |
125 | | /// If constraint propagation reveals an infeasibility for any child, returns |
126 | | /// [`None`]. If none of the children intervals change as a result of propagation, |
127 | | /// may return an empty vector instead of cloning `children`. This is the default |
128 | | /// (and conservative) return value. |
129 | | /// |
130 | | /// # Example |
131 | | /// |
132 | | /// If the expression is `a + b`, the current `interval` is `[4, 5]` and the |
133 | | /// inputs `a` and `b` are respectively given as `[0, 2]` and `[-∞, 4]`, then |
134 | | /// propagation would return `[0, 2]` and `[2, 4]` as `b` must be at least |
135 | | /// `2` to make the output at least `4`. |
136 | 0 | fn propagate_constraints( |
137 | 0 | &self, |
138 | 0 | _interval: &Interval, |
139 | 0 | _children: &[&Interval], |
140 | 0 | ) -> Result<Option<Vec<Interval>>> { |
141 | 0 | Ok(Some(vec![])) |
142 | 0 | } |
143 | | |
144 | | /// Update the hash `state` with this expression requirements from |
145 | | /// [`Hash`]. |
146 | | /// |
147 | | /// This method is required to support hashing [`PhysicalExpr`]s. To |
148 | | /// implement it, typically the type implementing |
149 | | /// [`PhysicalExpr`] implements [`Hash`] and |
150 | | /// then the following boiler plate is used: |
151 | | /// |
152 | | /// # Example: |
153 | | /// ``` |
154 | | /// // User defined expression that derives Hash |
155 | | /// #[derive(Hash, Debug, PartialEq, Eq)] |
156 | | /// struct MyExpr { |
157 | | /// val: u64 |
158 | | /// } |
159 | | /// |
160 | | /// // impl PhysicalExpr { |
161 | | /// // ... |
162 | | /// # impl MyExpr { |
163 | | /// // Boiler plate to call the derived Hash impl |
164 | | /// fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) { |
165 | | /// use std::hash::Hash; |
166 | | /// let mut s = state; |
167 | | /// self.hash(&mut s); |
168 | | /// } |
169 | | /// // } |
170 | | /// # } |
171 | | /// ``` |
172 | | /// Note: [`PhysicalExpr`] is not constrained by [`Hash`] |
173 | | /// directly because it must remain object safe. |
174 | | fn dyn_hash(&self, _state: &mut dyn Hasher); |
175 | | |
176 | | /// Calculates the properties of this [`PhysicalExpr`] based on its |
177 | | /// children's properties (i.e. order and range), recursively aggregating |
178 | | /// the information from its children. In cases where the [`PhysicalExpr`] |
179 | | /// has no children (e.g., `Literal` or `Column`), these properties should |
180 | | /// be specified externally, as the function defaults to unknown properties. |
181 | 0 | fn get_properties(&self, _children: &[ExprProperties]) -> Result<ExprProperties> { |
182 | 0 | Ok(ExprProperties::new_unknown()) |
183 | 0 | } |
184 | | } |
185 | | |
186 | | impl Hash for dyn PhysicalExpr { |
187 | 20 | fn hash<H: Hasher>(&self, state: &mut H) { |
188 | 20 | self.dyn_hash(state); |
189 | 20 | } |
190 | | } |
191 | | |
192 | | /// Returns a copy of this expr if we change any child according to the pointer comparison. |
193 | | /// The size of `children` must be equal to the size of `PhysicalExpr::children()`. |
194 | 8.17k | pub fn with_new_children_if_necessary( |
195 | 8.17k | expr: Arc<dyn PhysicalExpr>, |
196 | 8.17k | children: Vec<Arc<dyn PhysicalExpr>>, |
197 | 8.17k | ) -> Result<Arc<dyn PhysicalExpr>> { |
198 | 8.17k | let old_children = expr.children(); |
199 | 8.17k | if children.len() != old_children.len() { |
200 | 0 | internal_err!("PhysicalExpr: Wrong number of children") |
201 | 8.17k | } else if children.is_empty() |
202 | 8.17k | || children |
203 | 8.17k | .iter() |
204 | 8.17k | .zip(old_children.iter()) |
205 | 15.9k | .any(|(c1, c2)| !Arc::ptr_eq(c1, c2))8.17k |
206 | | { |
207 | 66 | Ok(expr.with_new_children(children)?0 ) |
208 | | } else { |
209 | 8.10k | Ok(expr) |
210 | | } |
211 | 8.17k | } |
212 | | |
213 | 175k | pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any { |
214 | 175k | if any.is::<Arc<dyn PhysicalExpr>>() { |
215 | 175k | any.downcast_ref::<Arc<dyn PhysicalExpr>>() |
216 | 175k | .unwrap() |
217 | 175k | .as_any() |
218 | 0 | } else if any.is::<Box<dyn PhysicalExpr>>() { |
219 | 0 | any.downcast_ref::<Box<dyn PhysicalExpr>>() |
220 | 0 | .unwrap() |
221 | 0 | .as_any() |
222 | | } else { |
223 | 0 | any |
224 | | } |
225 | 175k | } |
226 | | |
227 | | /// Returns [`Display`] able a list of [`PhysicalExpr`] |
228 | | /// |
229 | | /// Example output: `[a + 1, b]` |
230 | 0 | pub fn format_physical_expr_list(exprs: &[Arc<dyn PhysicalExpr>]) -> impl Display + '_ { |
231 | | struct DisplayWrapper<'a>(&'a [Arc<dyn PhysicalExpr>]); |
232 | | impl<'a> Display for DisplayWrapper<'a> { |
233 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
234 | 0 | let mut iter = self.0.iter(); |
235 | 0 | write!(f, "[")?; |
236 | 0 | if let Some(expr) = iter.next() { |
237 | 0 | write!(f, "{}", expr)?; |
238 | 0 | } |
239 | 0 | for expr in iter { |
240 | 0 | write!(f, ", {}", expr)?; |
241 | | } |
242 | 0 | write!(f, "]")?; |
243 | 0 | Ok(()) |
244 | 0 | } |
245 | | } |
246 | 0 | DisplayWrapper(exprs) |
247 | 0 | } |