/Users/andrewlamb/Software/datafusion/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/bool_op.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use std::sync::Arc; |
19 | | |
20 | | use crate::aggregate::groups_accumulator::nulls::filtered_null_mask; |
21 | | use arrow::array::{ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder}; |
22 | | use arrow::buffer::BooleanBuffer; |
23 | | use datafusion_common::Result; |
24 | | use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator}; |
25 | | |
26 | | use super::accumulate::NullState; |
27 | | |
28 | | /// An accumulator that implements a single operation over a |
29 | | /// [`BooleanArray`] where the accumulated state is also boolean (such |
30 | | /// as [`BitAndAssign`]) |
31 | | /// |
32 | | /// F: The function to apply to two elements. The first argument is |
33 | | /// the existing value and should be updated with the second value |
34 | | /// (e.g. [`BitAndAssign`] style). |
35 | | /// |
36 | | /// [`BitAndAssign`]: std::ops::BitAndAssign |
37 | | #[derive(Debug)] |
38 | | pub struct BooleanGroupsAccumulator<F> |
39 | | where |
40 | | F: Fn(bool, bool) -> bool + Send + Sync, |
41 | | { |
42 | | /// values per group |
43 | | values: BooleanBufferBuilder, |
44 | | |
45 | | /// Track nulls in the input / filters |
46 | | null_state: NullState, |
47 | | |
48 | | /// Function that computes the output |
49 | | bool_fn: F, |
50 | | |
51 | | /// The identity element for the boolean operation. |
52 | | /// Any value combined with this returns the original value. |
53 | | identity: bool, |
54 | | } |
55 | | |
56 | | impl<F> BooleanGroupsAccumulator<F> |
57 | | where |
58 | | F: Fn(bool, bool) -> bool + Send + Sync, |
59 | | { |
60 | 0 | pub fn new(bool_fn: F, identity: bool) -> Self { |
61 | 0 | Self { |
62 | 0 | values: BooleanBufferBuilder::new(0), |
63 | 0 | null_state: NullState::new(), |
64 | 0 | bool_fn, |
65 | 0 | identity, |
66 | 0 | } |
67 | 0 | } |
68 | | } |
69 | | |
70 | | impl<F> GroupsAccumulator for BooleanGroupsAccumulator<F> |
71 | | where |
72 | | F: Fn(bool, bool) -> bool + Send + Sync, |
73 | | { |
74 | 0 | fn update_batch( |
75 | 0 | &mut self, |
76 | 0 | values: &[ArrayRef], |
77 | 0 | group_indices: &[usize], |
78 | 0 | opt_filter: Option<&BooleanArray>, |
79 | 0 | total_num_groups: usize, |
80 | 0 | ) -> Result<()> { |
81 | 0 | assert_eq!(values.len(), 1, "single argument to update_batch"); |
82 | 0 | let values = values[0].as_boolean(); |
83 | 0 |
|
84 | 0 | if self.values.len() < total_num_groups { |
85 | 0 | let new_groups = total_num_groups - self.values.len(); |
86 | 0 | // Fill with the identity element, so that when the first non-null value is encountered, |
87 | 0 | // it will combine with the identity and the result will be the first non-null value itself. |
88 | 0 | self.values.append_n(new_groups, self.identity); |
89 | 0 | } |
90 | | |
91 | | // NullState dispatches / handles tracking nulls and groups that saw no values |
92 | 0 | self.null_state.accumulate_boolean( |
93 | 0 | group_indices, |
94 | 0 | values, |
95 | 0 | opt_filter, |
96 | 0 | total_num_groups, |
97 | 0 | |group_index, new_value| { |
98 | 0 | let current_value = self.values.get_bit(group_index); |
99 | 0 | let value = (self.bool_fn)(current_value, new_value); |
100 | 0 | self.values.set_bit(group_index, value); |
101 | 0 | }, |
102 | 0 | ); |
103 | 0 |
|
104 | 0 | Ok(()) |
105 | 0 | } |
106 | | |
107 | 0 | fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> { |
108 | 0 | let values = self.values.finish(); |
109 | | |
110 | 0 | let values = match emit_to { |
111 | 0 | EmitTo::All => values, |
112 | 0 | EmitTo::First(n) => { |
113 | 0 | let first_n: BooleanBuffer = values.iter().take(n).collect(); |
114 | | // put n+1 back into self.values |
115 | 0 | for v in values.iter().skip(n) { |
116 | 0 | self.values.append(v); |
117 | 0 | } |
118 | 0 | first_n |
119 | | } |
120 | | }; |
121 | | |
122 | 0 | let nulls = self.null_state.build(emit_to); |
123 | 0 | let values = BooleanArray::new(values, Some(nulls)); |
124 | 0 | Ok(Arc::new(values)) |
125 | 0 | } |
126 | | |
127 | 0 | fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> { |
128 | 0 | self.evaluate(emit_to).map(|arr| vec![arr]) |
129 | 0 | } |
130 | | |
131 | 0 | fn merge_batch( |
132 | 0 | &mut self, |
133 | 0 | values: &[ArrayRef], |
134 | 0 | group_indices: &[usize], |
135 | 0 | opt_filter: Option<&BooleanArray>, |
136 | 0 | total_num_groups: usize, |
137 | 0 | ) -> Result<()> { |
138 | 0 | // update / merge are the same |
139 | 0 | self.update_batch(values, group_indices, opt_filter, total_num_groups) |
140 | 0 | } |
141 | | |
142 | 0 | fn size(&self) -> usize { |
143 | 0 | // capacity is in bits, so convert to bytes |
144 | 0 | self.values.capacity() / 8 + self.null_state.size() |
145 | 0 | } |
146 | | |
147 | 0 | fn convert_to_state( |
148 | 0 | &self, |
149 | 0 | values: &[ArrayRef], |
150 | 0 | opt_filter: Option<&BooleanArray>, |
151 | 0 | ) -> Result<Vec<ArrayRef>> { |
152 | 0 | let values = values[0].as_boolean().clone(); |
153 | 0 |
|
154 | 0 | let values_null_buffer_filtered = filtered_null_mask(opt_filter, &values); |
155 | 0 | let (values_buf, _) = values.into_parts(); |
156 | 0 | let values_filtered = BooleanArray::new(values_buf, values_null_buffer_filtered); |
157 | 0 |
|
158 | 0 | Ok(vec![Arc::new(values_filtered)]) |
159 | 0 | } |
160 | | |
161 | 0 | fn supports_convert_to_state(&self) -> bool { |
162 | 0 | true |
163 | 0 | } |
164 | | } |