/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/physical_expr.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use std::sync::Arc; |
19 | | |
20 | | pub(crate) use datafusion_physical_expr_common::physical_expr::PhysicalExpr; |
21 | | use itertools::izip; |
22 | | |
23 | | pub use datafusion_physical_expr_common::physical_expr::down_cast_any_ref; |
24 | | |
25 | | /// Shared [`PhysicalExpr`]. |
26 | | pub type PhysicalExprRef = Arc<dyn PhysicalExpr>; |
27 | | |
28 | | /// This function is similar to the `contains` method of `Vec`. It finds |
29 | | /// whether `expr` is among `physical_exprs`. |
30 | 4.14k | pub fn physical_exprs_contains( |
31 | 4.14k | physical_exprs: &[Arc<dyn PhysicalExpr>], |
32 | 4.14k | expr: &Arc<dyn PhysicalExpr>, |
33 | 4.14k | ) -> bool { |
34 | 4.14k | physical_exprs |
35 | 4.14k | .iter() |
36 | 4.14k | .any(|physical_expr| physical_expr.eq(expr)4.08k ) |
37 | 4.14k | } |
38 | | |
39 | | /// Checks whether the given physical expression slices are equal. |
40 | 0 | pub fn physical_exprs_equal( |
41 | 0 | lhs: &[Arc<dyn PhysicalExpr>], |
42 | 0 | rhs: &[Arc<dyn PhysicalExpr>], |
43 | 0 | ) -> bool { |
44 | 0 | lhs.len() == rhs.len() && izip!(lhs, rhs).all(|(lhs, rhs)| lhs.eq(rhs)) |
45 | 0 | } |
46 | | |
47 | | /// Checks whether the given physical expression slices are equal in the sense |
48 | | /// of bags (multi-sets), disregarding their orderings. |
49 | 0 | pub fn physical_exprs_bag_equal( |
50 | 0 | lhs: &[Arc<dyn PhysicalExpr>], |
51 | 0 | rhs: &[Arc<dyn PhysicalExpr>], |
52 | 0 | ) -> bool { |
53 | 0 | // TODO: Once we can use `HashMap`s with `Arc<dyn PhysicalExpr>`, this |
54 | 0 | // function should use a `HashMap` to reduce computational complexity. |
55 | 0 | if lhs.len() == rhs.len() { |
56 | 0 | let mut rhs_vec = rhs.to_vec(); |
57 | 0 | for expr in lhs { |
58 | 0 | if let Some(idx) = rhs_vec.iter().position(|e| expr.eq(e)) { |
59 | 0 | rhs_vec.swap_remove(idx); |
60 | 0 | } else { |
61 | 0 | return false; |
62 | | } |
63 | | } |
64 | 0 | true |
65 | | } else { |
66 | 0 | false |
67 | | } |
68 | 0 | } |
69 | | |
70 | | /// This utility function removes duplicates from the given `exprs` vector. |
71 | | /// Note that this function does not necessarily preserve its input ordering. |
72 | 183 | pub fn deduplicate_physical_exprs(exprs: &mut Vec<Arc<dyn PhysicalExpr>>) { |
73 | 183 | // TODO: Once we can use `HashSet`s with `Arc<dyn PhysicalExpr>`, this |
74 | 183 | // function should use a `HashSet` to reduce computational complexity. |
75 | 183 | // See issue: https://github.com/apache/datafusion/issues/8027 |
76 | 183 | let mut idx = 0; |
77 | 549 | while idx < exprs.len() { |
78 | 366 | let mut rest_idx = idx + 1; |
79 | 549 | while rest_idx < exprs.len() { |
80 | 183 | if exprs[idx].eq(&exprs[rest_idx]) { |
81 | 0 | exprs.swap_remove(rest_idx); |
82 | 183 | } else { |
83 | 183 | rest_idx += 1; |
84 | 183 | } |
85 | | } |
86 | 366 | idx += 1; |
87 | | } |
88 | 183 | } |
89 | | |
90 | | #[cfg(test)] |
91 | | mod tests { |
92 | | use std::sync::Arc; |
93 | | |
94 | | use crate::expressions::{Column, Literal}; |
95 | | use crate::physical_expr::{ |
96 | | deduplicate_physical_exprs, physical_exprs_bag_equal, physical_exprs_contains, |
97 | | physical_exprs_equal, PhysicalExpr, |
98 | | }; |
99 | | |
100 | | use datafusion_common::ScalarValue; |
101 | | |
102 | | #[test] |
103 | | fn test_physical_exprs_contains() { |
104 | | let lit_true = Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))) |
105 | | as Arc<dyn PhysicalExpr>; |
106 | | let lit_false = Arc::new(Literal::new(ScalarValue::Boolean(Some(false)))) |
107 | | as Arc<dyn PhysicalExpr>; |
108 | | let lit4 = |
109 | | Arc::new(Literal::new(ScalarValue::Int32(Some(4)))) as Arc<dyn PhysicalExpr>; |
110 | | let lit2 = |
111 | | Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) as Arc<dyn PhysicalExpr>; |
112 | | let lit1 = |
113 | | Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc<dyn PhysicalExpr>; |
114 | | let col_a_expr = Arc::new(Column::new("a", 0)) as Arc<dyn PhysicalExpr>; |
115 | | let col_b_expr = Arc::new(Column::new("b", 1)) as Arc<dyn PhysicalExpr>; |
116 | | let col_c_expr = Arc::new(Column::new("c", 2)) as Arc<dyn PhysicalExpr>; |
117 | | |
118 | | // lit(true), lit(false), lit(4), lit(2), Col(a), Col(b) |
119 | | let physical_exprs: Vec<Arc<dyn PhysicalExpr>> = vec![ |
120 | | Arc::clone(&lit_true), |
121 | | Arc::clone(&lit_false), |
122 | | Arc::clone(&lit4), |
123 | | Arc::clone(&lit2), |
124 | | Arc::clone(&col_a_expr), |
125 | | Arc::clone(&col_b_expr), |
126 | | ]; |
127 | | // below expressions are inside physical_exprs |
128 | | assert!(physical_exprs_contains(&physical_exprs, &lit_true)); |
129 | | assert!(physical_exprs_contains(&physical_exprs, &lit2)); |
130 | | assert!(physical_exprs_contains(&physical_exprs, &col_b_expr)); |
131 | | |
132 | | // below expressions are not inside physical_exprs |
133 | | assert!(!physical_exprs_contains(&physical_exprs, &col_c_expr)); |
134 | | assert!(!physical_exprs_contains(&physical_exprs, &lit1)); |
135 | | } |
136 | | |
137 | | #[test] |
138 | | fn test_physical_exprs_equal() { |
139 | | let lit_true = Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))) |
140 | | as Arc<dyn PhysicalExpr>; |
141 | | let lit_false = Arc::new(Literal::new(ScalarValue::Boolean(Some(false)))) |
142 | | as Arc<dyn PhysicalExpr>; |
143 | | let lit1 = |
144 | | Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc<dyn PhysicalExpr>; |
145 | | let lit2 = |
146 | | Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) as Arc<dyn PhysicalExpr>; |
147 | | let col_b_expr = Arc::new(Column::new("b", 1)) as Arc<dyn PhysicalExpr>; |
148 | | |
149 | | let vec1 = vec![Arc::clone(&lit_true), Arc::clone(&lit_false)]; |
150 | | let vec2 = vec![Arc::clone(&lit_true), Arc::clone(&col_b_expr)]; |
151 | | let vec3 = vec![Arc::clone(&lit2), Arc::clone(&lit1)]; |
152 | | let vec4 = vec![Arc::clone(&lit_true), Arc::clone(&lit_false)]; |
153 | | |
154 | | // these vectors are same |
155 | | assert!(physical_exprs_equal(&vec1, &vec1)); |
156 | | assert!(physical_exprs_equal(&vec1, &vec4)); |
157 | | assert!(physical_exprs_bag_equal(&vec1, &vec1)); |
158 | | assert!(physical_exprs_bag_equal(&vec1, &vec4)); |
159 | | |
160 | | // these vectors are different |
161 | | assert!(!physical_exprs_equal(&vec1, &vec2)); |
162 | | assert!(!physical_exprs_equal(&vec1, &vec3)); |
163 | | assert!(!physical_exprs_bag_equal(&vec1, &vec2)); |
164 | | assert!(!physical_exprs_bag_equal(&vec1, &vec3)); |
165 | | } |
166 | | |
167 | | #[test] |
168 | | fn test_physical_exprs_set_equal() { |
169 | | let list1: Vec<Arc<dyn PhysicalExpr>> = vec![ |
170 | | Arc::new(Column::new("a", 0)), |
171 | | Arc::new(Column::new("a", 0)), |
172 | | Arc::new(Column::new("b", 1)), |
173 | | ]; |
174 | | let list2: Vec<Arc<dyn PhysicalExpr>> = vec![ |
175 | | Arc::new(Column::new("b", 1)), |
176 | | Arc::new(Column::new("b", 1)), |
177 | | Arc::new(Column::new("a", 0)), |
178 | | ]; |
179 | | assert!(!physical_exprs_bag_equal( |
180 | | list1.as_slice(), |
181 | | list2.as_slice() |
182 | | )); |
183 | | assert!(!physical_exprs_bag_equal( |
184 | | list2.as_slice(), |
185 | | list1.as_slice() |
186 | | )); |
187 | | assert!(!physical_exprs_equal(list1.as_slice(), list2.as_slice())); |
188 | | assert!(!physical_exprs_equal(list2.as_slice(), list1.as_slice())); |
189 | | |
190 | | let list3: Vec<Arc<dyn PhysicalExpr>> = vec![ |
191 | | Arc::new(Column::new("a", 0)), |
192 | | Arc::new(Column::new("b", 1)), |
193 | | Arc::new(Column::new("c", 2)), |
194 | | Arc::new(Column::new("a", 0)), |
195 | | Arc::new(Column::new("b", 1)), |
196 | | ]; |
197 | | let list4: Vec<Arc<dyn PhysicalExpr>> = vec![ |
198 | | Arc::new(Column::new("b", 1)), |
199 | | Arc::new(Column::new("b", 1)), |
200 | | Arc::new(Column::new("a", 0)), |
201 | | Arc::new(Column::new("c", 2)), |
202 | | Arc::new(Column::new("a", 0)), |
203 | | ]; |
204 | | assert!(physical_exprs_bag_equal(list3.as_slice(), list4.as_slice())); |
205 | | assert!(physical_exprs_bag_equal(list4.as_slice(), list3.as_slice())); |
206 | | assert!(physical_exprs_bag_equal(list3.as_slice(), list3.as_slice())); |
207 | | assert!(physical_exprs_bag_equal(list4.as_slice(), list4.as_slice())); |
208 | | assert!(!physical_exprs_equal(list3.as_slice(), list4.as_slice())); |
209 | | assert!(!physical_exprs_equal(list4.as_slice(), list3.as_slice())); |
210 | | assert!(physical_exprs_bag_equal(list3.as_slice(), list3.as_slice())); |
211 | | assert!(physical_exprs_bag_equal(list4.as_slice(), list4.as_slice())); |
212 | | } |
213 | | |
214 | | #[test] |
215 | | fn test_deduplicate_physical_exprs() { |
216 | | let lit_true = &(Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))) |
217 | | as Arc<dyn PhysicalExpr>); |
218 | | let lit_false = &(Arc::new(Literal::new(ScalarValue::Boolean(Some(false)))) |
219 | | as Arc<dyn PhysicalExpr>); |
220 | | let lit4 = &(Arc::new(Literal::new(ScalarValue::Int32(Some(4)))) |
221 | | as Arc<dyn PhysicalExpr>); |
222 | | let lit2 = &(Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) |
223 | | as Arc<dyn PhysicalExpr>); |
224 | | let col_a_expr = &(Arc::new(Column::new("a", 0)) as Arc<dyn PhysicalExpr>); |
225 | | let col_b_expr = &(Arc::new(Column::new("b", 1)) as Arc<dyn PhysicalExpr>); |
226 | | |
227 | | // First vector in the tuple is arguments, second one is the expected value. |
228 | | let test_cases = vec![ |
229 | | // ---------- TEST CASE 1----------// |
230 | | ( |
231 | | vec![ |
232 | | lit_true, lit_false, lit4, lit2, col_a_expr, col_a_expr, col_b_expr, |
233 | | lit_true, lit2, |
234 | | ], |
235 | | vec![lit_true, lit_false, lit4, lit2, col_a_expr, col_b_expr], |
236 | | ), |
237 | | // ---------- TEST CASE 2----------// |
238 | | ( |
239 | | vec![lit_true, lit_true, lit_false, lit4], |
240 | | vec![lit_true, lit4, lit_false], |
241 | | ), |
242 | | ]; |
243 | | for (exprs, expected) in test_cases { |
244 | | let mut exprs = exprs.into_iter().cloned().collect::<Vec<_>>(); |
245 | | let expected = expected.into_iter().cloned().collect::<Vec<_>>(); |
246 | | deduplicate_physical_exprs(&mut exprs); |
247 | | assert!(physical_exprs_equal(&exprs, &expected)); |
248 | | } |
249 | | } |
250 | | } |