Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/physical_expr.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::sync::Arc;
19
20
pub(crate) use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
21
use itertools::izip;
22
23
pub use datafusion_physical_expr_common::physical_expr::down_cast_any_ref;
24
25
/// Shared [`PhysicalExpr`].
26
pub type PhysicalExprRef = Arc<dyn PhysicalExpr>;
27
28
/// This function is similar to the `contains` method of `Vec`. It finds
29
/// whether `expr` is among `physical_exprs`.
30
4.14k
pub fn physical_exprs_contains(
31
4.14k
    physical_exprs: &[Arc<dyn PhysicalExpr>],
32
4.14k
    expr: &Arc<dyn PhysicalExpr>,
33
4.14k
) -> bool {
34
4.14k
    physical_exprs
35
4.14k
        .iter()
36
4.14k
        .any(|physical_expr| 
physical_expr.eq(expr)4.08k
)
37
4.14k
}
38
39
/// Checks whether the given physical expression slices are equal.
40
0
pub fn physical_exprs_equal(
41
0
    lhs: &[Arc<dyn PhysicalExpr>],
42
0
    rhs: &[Arc<dyn PhysicalExpr>],
43
0
) -> bool {
44
0
    lhs.len() == rhs.len() && izip!(lhs, rhs).all(|(lhs, rhs)| lhs.eq(rhs))
45
0
}
46
47
/// Checks whether the given physical expression slices are equal in the sense
48
/// of bags (multi-sets), disregarding their orderings.
49
0
pub fn physical_exprs_bag_equal(
50
0
    lhs: &[Arc<dyn PhysicalExpr>],
51
0
    rhs: &[Arc<dyn PhysicalExpr>],
52
0
) -> bool {
53
0
    // TODO: Once we can use `HashMap`s with `Arc<dyn PhysicalExpr>`, this
54
0
    //       function should use a `HashMap` to reduce computational complexity.
55
0
    if lhs.len() == rhs.len() {
56
0
        let mut rhs_vec = rhs.to_vec();
57
0
        for expr in lhs {
58
0
            if let Some(idx) = rhs_vec.iter().position(|e| expr.eq(e)) {
59
0
                rhs_vec.swap_remove(idx);
60
0
            } else {
61
0
                return false;
62
            }
63
        }
64
0
        true
65
    } else {
66
0
        false
67
    }
68
0
}
69
70
/// This utility function removes duplicates from the given `exprs` vector.
71
/// Note that this function does not necessarily preserve its input ordering.
72
183
pub fn deduplicate_physical_exprs(exprs: &mut Vec<Arc<dyn PhysicalExpr>>) {
73
183
    // TODO: Once we can use `HashSet`s with `Arc<dyn PhysicalExpr>`, this
74
183
    //       function should use a `HashSet` to reduce computational complexity.
75
183
    // See issue: https://github.com/apache/datafusion/issues/8027
76
183
    let mut idx = 0;
77
549
    while idx < exprs.len() {
78
366
        let mut rest_idx = idx + 1;
79
549
        while rest_idx < exprs.len() {
80
183
            if exprs[idx].eq(&exprs[rest_idx]) {
81
0
                exprs.swap_remove(rest_idx);
82
183
            } else {
83
183
                rest_idx += 1;
84
183
            }
85
        }
86
366
        idx += 1;
87
    }
88
183
}
89
90
#[cfg(test)]
91
mod tests {
92
    use std::sync::Arc;
93
94
    use crate::expressions::{Column, Literal};
95
    use crate::physical_expr::{
96
        deduplicate_physical_exprs, physical_exprs_bag_equal, physical_exprs_contains,
97
        physical_exprs_equal, PhysicalExpr,
98
    };
99
100
    use datafusion_common::ScalarValue;
101
102
    #[test]
103
    fn test_physical_exprs_contains() {
104
        let lit_true = Arc::new(Literal::new(ScalarValue::Boolean(Some(true))))
105
            as Arc<dyn PhysicalExpr>;
106
        let lit_false = Arc::new(Literal::new(ScalarValue::Boolean(Some(false))))
107
            as Arc<dyn PhysicalExpr>;
108
        let lit4 =
109
            Arc::new(Literal::new(ScalarValue::Int32(Some(4)))) as Arc<dyn PhysicalExpr>;
110
        let lit2 =
111
            Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) as Arc<dyn PhysicalExpr>;
112
        let lit1 =
113
            Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc<dyn PhysicalExpr>;
114
        let col_a_expr = Arc::new(Column::new("a", 0)) as Arc<dyn PhysicalExpr>;
115
        let col_b_expr = Arc::new(Column::new("b", 1)) as Arc<dyn PhysicalExpr>;
116
        let col_c_expr = Arc::new(Column::new("c", 2)) as Arc<dyn PhysicalExpr>;
117
118
        // lit(true), lit(false), lit(4), lit(2), Col(a), Col(b)
119
        let physical_exprs: Vec<Arc<dyn PhysicalExpr>> = vec![
120
            Arc::clone(&lit_true),
121
            Arc::clone(&lit_false),
122
            Arc::clone(&lit4),
123
            Arc::clone(&lit2),
124
            Arc::clone(&col_a_expr),
125
            Arc::clone(&col_b_expr),
126
        ];
127
        // below expressions are inside physical_exprs
128
        assert!(physical_exprs_contains(&physical_exprs, &lit_true));
129
        assert!(physical_exprs_contains(&physical_exprs, &lit2));
130
        assert!(physical_exprs_contains(&physical_exprs, &col_b_expr));
131
132
        // below expressions are not inside physical_exprs
133
        assert!(!physical_exprs_contains(&physical_exprs, &col_c_expr));
134
        assert!(!physical_exprs_contains(&physical_exprs, &lit1));
135
    }
136
137
    #[test]
138
    fn test_physical_exprs_equal() {
139
        let lit_true = Arc::new(Literal::new(ScalarValue::Boolean(Some(true))))
140
            as Arc<dyn PhysicalExpr>;
141
        let lit_false = Arc::new(Literal::new(ScalarValue::Boolean(Some(false))))
142
            as Arc<dyn PhysicalExpr>;
143
        let lit1 =
144
            Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc<dyn PhysicalExpr>;
145
        let lit2 =
146
            Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) as Arc<dyn PhysicalExpr>;
147
        let col_b_expr = Arc::new(Column::new("b", 1)) as Arc<dyn PhysicalExpr>;
148
149
        let vec1 = vec![Arc::clone(&lit_true), Arc::clone(&lit_false)];
150
        let vec2 = vec![Arc::clone(&lit_true), Arc::clone(&col_b_expr)];
151
        let vec3 = vec![Arc::clone(&lit2), Arc::clone(&lit1)];
152
        let vec4 = vec![Arc::clone(&lit_true), Arc::clone(&lit_false)];
153
154
        // these vectors are same
155
        assert!(physical_exprs_equal(&vec1, &vec1));
156
        assert!(physical_exprs_equal(&vec1, &vec4));
157
        assert!(physical_exprs_bag_equal(&vec1, &vec1));
158
        assert!(physical_exprs_bag_equal(&vec1, &vec4));
159
160
        // these vectors are different
161
        assert!(!physical_exprs_equal(&vec1, &vec2));
162
        assert!(!physical_exprs_equal(&vec1, &vec3));
163
        assert!(!physical_exprs_bag_equal(&vec1, &vec2));
164
        assert!(!physical_exprs_bag_equal(&vec1, &vec3));
165
    }
166
167
    #[test]
168
    fn test_physical_exprs_set_equal() {
169
        let list1: Vec<Arc<dyn PhysicalExpr>> = vec![
170
            Arc::new(Column::new("a", 0)),
171
            Arc::new(Column::new("a", 0)),
172
            Arc::new(Column::new("b", 1)),
173
        ];
174
        let list2: Vec<Arc<dyn PhysicalExpr>> = vec![
175
            Arc::new(Column::new("b", 1)),
176
            Arc::new(Column::new("b", 1)),
177
            Arc::new(Column::new("a", 0)),
178
        ];
179
        assert!(!physical_exprs_bag_equal(
180
            list1.as_slice(),
181
            list2.as_slice()
182
        ));
183
        assert!(!physical_exprs_bag_equal(
184
            list2.as_slice(),
185
            list1.as_slice()
186
        ));
187
        assert!(!physical_exprs_equal(list1.as_slice(), list2.as_slice()));
188
        assert!(!physical_exprs_equal(list2.as_slice(), list1.as_slice()));
189
190
        let list3: Vec<Arc<dyn PhysicalExpr>> = vec![
191
            Arc::new(Column::new("a", 0)),
192
            Arc::new(Column::new("b", 1)),
193
            Arc::new(Column::new("c", 2)),
194
            Arc::new(Column::new("a", 0)),
195
            Arc::new(Column::new("b", 1)),
196
        ];
197
        let list4: Vec<Arc<dyn PhysicalExpr>> = vec![
198
            Arc::new(Column::new("b", 1)),
199
            Arc::new(Column::new("b", 1)),
200
            Arc::new(Column::new("a", 0)),
201
            Arc::new(Column::new("c", 2)),
202
            Arc::new(Column::new("a", 0)),
203
        ];
204
        assert!(physical_exprs_bag_equal(list3.as_slice(), list4.as_slice()));
205
        assert!(physical_exprs_bag_equal(list4.as_slice(), list3.as_slice()));
206
        assert!(physical_exprs_bag_equal(list3.as_slice(), list3.as_slice()));
207
        assert!(physical_exprs_bag_equal(list4.as_slice(), list4.as_slice()));
208
        assert!(!physical_exprs_equal(list3.as_slice(), list4.as_slice()));
209
        assert!(!physical_exprs_equal(list4.as_slice(), list3.as_slice()));
210
        assert!(physical_exprs_bag_equal(list3.as_slice(), list3.as_slice()));
211
        assert!(physical_exprs_bag_equal(list4.as_slice(), list4.as_slice()));
212
    }
213
214
    #[test]
215
    fn test_deduplicate_physical_exprs() {
216
        let lit_true = &(Arc::new(Literal::new(ScalarValue::Boolean(Some(true))))
217
            as Arc<dyn PhysicalExpr>);
218
        let lit_false = &(Arc::new(Literal::new(ScalarValue::Boolean(Some(false))))
219
            as Arc<dyn PhysicalExpr>);
220
        let lit4 = &(Arc::new(Literal::new(ScalarValue::Int32(Some(4))))
221
            as Arc<dyn PhysicalExpr>);
222
        let lit2 = &(Arc::new(Literal::new(ScalarValue::Int32(Some(2))))
223
            as Arc<dyn PhysicalExpr>);
224
        let col_a_expr = &(Arc::new(Column::new("a", 0)) as Arc<dyn PhysicalExpr>);
225
        let col_b_expr = &(Arc::new(Column::new("b", 1)) as Arc<dyn PhysicalExpr>);
226
227
        // First vector in the tuple is arguments, second one is the expected value.
228
        let test_cases = vec![
229
            // ---------- TEST CASE 1----------//
230
            (
231
                vec![
232
                    lit_true, lit_false, lit4, lit2, col_a_expr, col_a_expr, col_b_expr,
233
                    lit_true, lit2,
234
                ],
235
                vec![lit_true, lit_false, lit4, lit2, col_a_expr, col_b_expr],
236
            ),
237
            // ---------- TEST CASE 2----------//
238
            (
239
                vec![lit_true, lit_true, lit_false, lit4],
240
                vec![lit_true, lit4, lit_false],
241
            ),
242
        ];
243
        for (exprs, expected) in test_cases {
244
            let mut exprs = exprs.into_iter().cloned().collect::<Vec<_>>();
245
            let expected = expected.into_iter().cloned().collect::<Vec<_>>();
246
            deduplicate_physical_exprs(&mut exprs);
247
            assert!(physical_exprs_equal(&exprs, &expected));
248
        }
249
    }
250
}