Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/functions-aggregate/src/bool_and_or.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Defines physical expressions that can evaluated at runtime during query execution
19
20
use std::any::Any;
21
22
use arrow::array::ArrayRef;
23
use arrow::array::BooleanArray;
24
use arrow::compute::bool_and as compute_bool_and;
25
use arrow::compute::bool_or as compute_bool_or;
26
use arrow::datatypes::DataType;
27
use arrow::datatypes::Field;
28
29
use datafusion_common::internal_err;
30
use datafusion_common::{downcast_value, not_impl_err};
31
use datafusion_common::{DataFusionError, Result, ScalarValue};
32
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
33
use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
34
use datafusion_expr::{
35
    Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, Volatility,
36
};
37
38
use datafusion_functions_aggregate_common::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
39
40
// returns the new value after bool_and/bool_or with the new values, taking nullability into account
41
macro_rules! typed_bool_and_or_batch {
42
    ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
43
        let array = downcast_value!($VALUES, $ARRAYTYPE);
44
        let delta = $OP(array);
45
        Ok(ScalarValue::$SCALAR(delta))
46
    }};
47
}
48
49
// bool_and/bool_or the array and returns a ScalarValue of its corresponding type.
50
macro_rules! bool_and_or_batch {
51
    ($VALUES:expr, $OP:ident) => {{
52
        match $VALUES.data_type() {
53
            DataType::Boolean => {
54
                typed_bool_and_or_batch!($VALUES, BooleanArray, Boolean, $OP)
55
            }
56
            e => {
57
                return internal_err!(
58
                    "Bool and/Bool or is not expected to receive the type {e:?}"
59
                );
60
            }
61
        }
62
    }};
63
}
64
65
/// dynamically-typed bool_and(array) -> ScalarValue
66
0
fn bool_and_batch(values: &ArrayRef) -> Result<ScalarValue> {
67
0
    bool_and_or_batch!(values, compute_bool_and)
68
0
}
69
70
/// dynamically-typed bool_or(array) -> ScalarValue
71
0
fn bool_or_batch(values: &ArrayRef) -> Result<ScalarValue> {
72
0
    bool_and_or_batch!(values, compute_bool_or)
73
0
}
74
75
make_udaf_expr_and_func!(
76
    BoolAnd,
77
    bool_and,
78
    expression,
79
    "The values to combine with `AND`",
80
    bool_and_udaf
81
);
82
83
make_udaf_expr_and_func!(
84
    BoolOr,
85
    bool_or,
86
    expression,
87
    "The values to combine with `OR`",
88
    bool_or_udaf
89
);
90
91
/// BOOL_AND aggregate expression
92
#[derive(Debug)]
93
pub struct BoolAnd {
94
    signature: Signature,
95
}
96
97
impl BoolAnd {
98
0
    fn new() -> Self {
99
0
        Self {
100
0
            signature: Signature::uniform(
101
0
                1,
102
0
                vec![DataType::Boolean],
103
0
                Volatility::Immutable,
104
0
            ),
105
0
        }
106
0
    }
107
}
108
109
impl Default for BoolAnd {
110
0
    fn default() -> Self {
111
0
        Self::new()
112
0
    }
113
}
114
115
impl AggregateUDFImpl for BoolAnd {
116
0
    fn as_any(&self) -> &dyn Any {
117
0
        self
118
0
    }
119
120
0
    fn name(&self) -> &str {
121
0
        "bool_and"
122
0
    }
123
124
0
    fn signature(&self) -> &Signature {
125
0
        &self.signature
126
0
    }
127
128
0
    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
129
0
        Ok(DataType::Boolean)
130
0
    }
131
132
0
    fn accumulator(&self, _: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
133
0
        Ok(Box::<BoolAndAccumulator>::default())
134
0
    }
135
136
0
    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
137
0
        Ok(vec![Field::new(
138
0
            format_state_name(args.name, self.name()),
139
0
            DataType::Boolean,
140
0
            true,
141
0
        )])
142
0
    }
143
144
0
    fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
145
0
        true
146
0
    }
147
148
0
    fn create_groups_accumulator(
149
0
        &self,
150
0
        args: AccumulatorArgs,
151
0
    ) -> Result<Box<dyn GroupsAccumulator>> {
152
0
        match args.return_type {
153
            DataType::Boolean => {
154
0
                Ok(Box::new(BooleanGroupsAccumulator::new(|x, y| x && y, true)))
155
            }
156
0
            _ => not_impl_err!(
157
0
                "GroupsAccumulator not supported for {} with {}",
158
0
                args.name,
159
0
                args.return_type
160
0
            ),
161
        }
162
0
    }
163
164
0
    fn aliases(&self) -> &[String] {
165
0
        &[]
166
0
    }
167
168
0
    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
169
0
        AggregateOrderSensitivity::Insensitive
170
0
    }
171
172
0
    fn reverse_expr(&self) -> ReversedUDAF {
173
0
        ReversedUDAF::Identical
174
0
    }
175
}
176
177
#[derive(Debug, Default)]
178
struct BoolAndAccumulator {
179
    acc: Option<bool>,
180
}
181
182
impl Accumulator for BoolAndAccumulator {
183
0
    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
184
0
        let values = &values[0];
185
0
        self.acc = match (self.acc, bool_and_batch(values)?) {
186
0
            (None, ScalarValue::Boolean(v)) => v,
187
0
            (Some(v), ScalarValue::Boolean(None)) => Some(v),
188
0
            (Some(a), ScalarValue::Boolean(Some(b))) => Some(a && b),
189
0
            _ => unreachable!(),
190
        };
191
0
        Ok(())
192
0
    }
193
194
0
    fn evaluate(&mut self) -> Result<ScalarValue> {
195
0
        Ok(ScalarValue::Boolean(self.acc))
196
0
    }
197
198
0
    fn size(&self) -> usize {
199
0
        std::mem::size_of_val(self)
200
0
    }
201
202
0
    fn state(&mut self) -> Result<Vec<ScalarValue>> {
203
0
        Ok(vec![ScalarValue::Boolean(self.acc)])
204
0
    }
205
206
0
    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
207
0
        self.update_batch(states)
208
0
    }
209
}
210
211
/// BOOL_OR aggregate expression
212
#[derive(Debug, Clone)]
213
pub struct BoolOr {
214
    signature: Signature,
215
}
216
217
impl BoolOr {
218
0
    fn new() -> Self {
219
0
        Self {
220
0
            signature: Signature::uniform(
221
0
                1,
222
0
                vec![DataType::Boolean],
223
0
                Volatility::Immutable,
224
0
            ),
225
0
        }
226
0
    }
227
}
228
229
impl Default for BoolOr {
230
0
    fn default() -> Self {
231
0
        Self::new()
232
0
    }
233
}
234
235
impl AggregateUDFImpl for BoolOr {
236
0
    fn as_any(&self) -> &dyn Any {
237
0
        self
238
0
    }
239
240
0
    fn name(&self) -> &str {
241
0
        "bool_or"
242
0
    }
243
244
0
    fn signature(&self) -> &Signature {
245
0
        &self.signature
246
0
    }
247
248
0
    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
249
0
        Ok(DataType::Boolean)
250
0
    }
251
252
0
    fn accumulator(&self, _: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
253
0
        Ok(Box::<BoolOrAccumulator>::default())
254
0
    }
255
256
0
    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
257
0
        Ok(vec![Field::new(
258
0
            format_state_name(args.name, self.name()),
259
0
            DataType::Boolean,
260
0
            true,
261
0
        )])
262
0
    }
263
264
0
    fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
265
0
        true
266
0
    }
267
268
0
    fn create_groups_accumulator(
269
0
        &self,
270
0
        args: AccumulatorArgs,
271
0
    ) -> Result<Box<dyn GroupsAccumulator>> {
272
0
        match args.return_type {
273
0
            DataType::Boolean => Ok(Box::new(BooleanGroupsAccumulator::new(
274
0
                |x, y| x || y,
275
0
                false,
276
0
            ))),
277
0
            _ => not_impl_err!(
278
0
                "GroupsAccumulator not supported for {} with {}",
279
0
                args.name,
280
0
                args.return_type
281
0
            ),
282
        }
283
0
    }
284
285
0
    fn aliases(&self) -> &[String] {
286
0
        &[]
287
0
    }
288
289
0
    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
290
0
        AggregateOrderSensitivity::Insensitive
291
0
    }
292
293
0
    fn reverse_expr(&self) -> ReversedUDAF {
294
0
        ReversedUDAF::Identical
295
0
    }
296
}
297
298
#[derive(Debug, Default)]
299
struct BoolOrAccumulator {
300
    acc: Option<bool>,
301
}
302
303
impl Accumulator for BoolOrAccumulator {
304
0
    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
305
0
        let values = &values[0];
306
0
        self.acc = match (self.acc, bool_or_batch(values)?) {
307
0
            (None, ScalarValue::Boolean(v)) => v,
308
0
            (Some(v), ScalarValue::Boolean(None)) => Some(v),
309
0
            (Some(a), ScalarValue::Boolean(Some(b))) => Some(a || b),
310
0
            _ => unreachable!(),
311
        };
312
0
        Ok(())
313
0
    }
314
315
0
    fn evaluate(&mut self) -> Result<ScalarValue> {
316
0
        Ok(ScalarValue::Boolean(self.acc))
317
0
    }
318
319
0
    fn size(&self) -> usize {
320
0
        std::mem::size_of_val(self)
321
0
    }
322
323
0
    fn state(&mut self) -> Result<Vec<ScalarValue>> {
324
0
        Ok(vec![ScalarValue::Boolean(self.acc)])
325
0
    }
326
327
0
    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
328
0
        self.update_batch(states)
329
0
    }
330
}