Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr-common/src/utils.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::sync::Arc;
19
20
use arrow::array::{make_array, Array, ArrayRef, BooleanArray, MutableArrayData};
21
use arrow::compute::{and_kleene, is_not_null, SlicesIterator};
22
23
use datafusion_common::Result;
24
use datafusion_expr_common::sort_properties::ExprProperties;
25
26
use crate::physical_expr::PhysicalExpr;
27
use crate::sort_expr::PhysicalSortExpr;
28
use crate::tree_node::ExprContext;
29
30
/// Represents a [`PhysicalExpr`] node with associated properties (order and
31
/// range) in a context where properties are tracked.
32
pub type ExprPropertiesNode = ExprContext<ExprProperties>;
33
34
impl ExprPropertiesNode {
35
    /// Constructs a new `ExprPropertiesNode` with unknown properties for a
36
    /// given physical expression. This node initializes with default properties
37
    /// and recursively applies this to all child expressions.
38
467
    pub fn new_unknown(expr: Arc<dyn PhysicalExpr>) -> Self {
39
467
        let children = expr
40
467
            .children()
41
467
            .into_iter()
42
467
            .cloned()
43
467
            .map(Self::new_unknown)
44
467
            .collect();
45
467
        Self {
46
467
            expr,
47
467
            data: ExprProperties::new_unknown(),
48
467
            children,
49
467
        }
50
467
    }
51
}
52
53
/// Scatter `truthy` array by boolean mask. When the mask evaluates `true`, next values of `truthy`
54
/// are taken, when the mask evaluates `false` values null values are filled.
55
///
56
/// # Arguments
57
/// * `mask` - Boolean values used to determine where to put the `truthy` values
58
/// * `truthy` - All values of this array are to scatter according to `mask` into final result.
59
0
pub fn scatter(mask: &BooleanArray, truthy: &dyn Array) -> Result<ArrayRef> {
60
0
    let truthy = truthy.to_data();
61
62
    // update the mask so that any null values become false
63
    // (SlicesIterator doesn't respect nulls)
64
0
    let mask = and_kleene(mask, &is_not_null(mask)?)?;
65
66
0
    let mut mutable = MutableArrayData::new(vec![&truthy], true, mask.len());
67
0
68
0
    // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
69
0
    // fill with falsy values
70
0
71
0
    // keep track of how much is filled
72
0
    let mut filled = 0;
73
0
    // keep track of current position we have in truthy array
74
0
    let mut true_pos = 0;
75
0
76
0
    SlicesIterator::new(&mask).for_each(|(start, end)| {
77
0
        // the gap needs to be filled with nulls
78
0
        if start > filled {
79
0
            mutable.extend_nulls(start - filled);
80
0
        }
81
        // fill with truthy values
82
0
        let len = end - start;
83
0
        mutable.extend(0, true_pos, true_pos + len);
84
0
        true_pos += len;
85
0
        filled = end;
86
0
    });
87
0
    // the remaining part is falsy
88
0
    if filled < mask.len() {
89
0
        mutable.extend_nulls(mask.len() - filled);
90
0
    }
91
92
0
    let data = mutable.freeze();
93
0
    Ok(make_array(data))
94
0
}
95
96
/// Reverses the ORDER BY expression, which is useful during equivalent window
97
/// expression construction. For instance, 'ORDER BY a ASC, NULLS LAST' turns into
98
/// 'ORDER BY a DESC, NULLS FIRST'.
99
78
pub fn reverse_order_bys(order_bys: &[PhysicalSortExpr]) -> Vec<PhysicalSortExpr> {
100
78
    order_bys
101
78
        .iter()
102
120
        .map(|e| PhysicalSortExpr::new(e.expr.clone(), !e.options))
103
78
        .collect()
104
78
}
105
106
#[cfg(test)]
107
mod tests {
108
    use std::sync::Arc;
109
110
    use arrow::array::Int32Array;
111
112
    use datafusion_common::cast::{as_boolean_array, as_int32_array};
113
114
    use super::*;
115
116
    #[test]
117
    fn scatter_int() -> Result<()> {
118
        let truthy = Arc::new(Int32Array::from(vec![1, 10, 11, 100]));
119
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
120
121
        // the output array is expected to be the same length as the mask array
122
        let expected =
123
            Int32Array::from_iter(vec![Some(1), Some(10), None, None, Some(11)]);
124
        let result = scatter(&mask, truthy.as_ref())?;
125
        let result = as_int32_array(&result)?;
126
127
        assert_eq!(&expected, result);
128
        Ok(())
129
    }
130
131
    #[test]
132
    fn scatter_int_end_with_false() -> Result<()> {
133
        let truthy = Arc::new(Int32Array::from(vec![1, 10, 11, 100]));
134
        let mask = BooleanArray::from(vec![true, false, true, false, false, false]);
135
136
        // output should be same length as mask
137
        let expected =
138
            Int32Array::from_iter(vec![Some(1), None, Some(10), None, None, None]);
139
        let result = scatter(&mask, truthy.as_ref())?;
140
        let result = as_int32_array(&result)?;
141
142
        assert_eq!(&expected, result);
143
        Ok(())
144
    }
145
146
    #[test]
147
    fn scatter_with_null_mask() -> Result<()> {
148
        let truthy = Arc::new(Int32Array::from(vec![1, 10, 11]));
149
        let mask: BooleanArray = vec![Some(false), None, Some(true), Some(true), None]
150
            .into_iter()
151
            .collect();
152
153
        // output should treat nulls as though they are false
154
        let expected = Int32Array::from_iter(vec![None, None, Some(1), Some(10), None]);
155
        let result = scatter(&mask, truthy.as_ref())?;
156
        let result = as_int32_array(&result)?;
157
158
        assert_eq!(&expected, result);
159
        Ok(())
160
    }
161
162
    #[test]
163
    fn scatter_boolean() -> Result<()> {
164
        let truthy = Arc::new(BooleanArray::from(vec![false, false, false, true]));
165
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
166
167
        // the output array is expected to be the same length as the mask array
168
        let expected = BooleanArray::from_iter(vec![
169
            Some(false),
170
            Some(false),
171
            None,
172
            None,
173
            Some(false),
174
        ]);
175
        let result = scatter(&mask, truthy.as_ref())?;
176
        let result = as_boolean_array(&result)?;
177
178
        assert_eq!(&expected, result);
179
        Ok(())
180
    }
181
}