/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use arrow_buffer::{BooleanBufferBuilder, NullBuffer}; |
19 | | |
20 | | /// Builder for an (optional) null mask |
21 | | /// |
22 | | /// Optimized for avoid creating the bitmask when all values are non-null |
23 | | #[derive(Debug)] |
24 | | pub(crate) enum MaybeNullBufferBuilder { |
25 | | /// seen `row_count` rows but no nulls yet |
26 | | NoNulls { row_count: usize }, |
27 | | /// have at least one null value |
28 | | /// |
29 | | /// Note this is an Arrow *VALIDITY* buffer (so it is false for nulls, true |
30 | | /// for non-nulls) |
31 | | Nulls(BooleanBufferBuilder), |
32 | | } |
33 | | |
34 | | impl MaybeNullBufferBuilder { |
35 | | /// Create a new builder |
36 | 38 | pub fn new() -> Self { |
37 | 38 | Self::NoNulls { row_count: 0 } |
38 | 38 | } |
39 | | |
40 | | /// Return true if the row at index `row` is null |
41 | 295k | pub fn is_null(&self, row: usize) -> bool { |
42 | 295k | match self { |
43 | 8.19k | Self::NoNulls { .. } => false, |
44 | | // validity mask means a unset bit is NULL |
45 | 286k | Self::Nulls(builder) => !builder.get_bit(row), |
46 | | } |
47 | 295k | } |
48 | | |
49 | | /// Set the nullness of the next row to `is_null` |
50 | | /// |
51 | | /// num_values is the current length of the rows being tracked |
52 | | /// |
53 | | /// If `value` is true, the row is null. |
54 | | /// If `value` is false, the row is non null |
55 | 403 | pub fn append(&mut self, is_null: bool) { |
56 | 75 | match self { |
57 | 75 | Self::NoNulls { row_count } if is_null => { |
58 | 35 | // have seen no nulls so far, this is the first null, |
59 | 35 | // need to create the nulls buffer for all currently valid values |
60 | 35 | // alloc 2x the need given we push a new but immediately |
61 | 35 | let mut nulls = BooleanBufferBuilder::new(*row_count * 2); |
62 | 35 | nulls.append_n(*row_count, true); |
63 | 35 | nulls.append(false); |
64 | 35 | *self = Self::Nulls(nulls); |
65 | 35 | } |
66 | 40 | Self::NoNulls { row_count } => { |
67 | 40 | *row_count += 1; |
68 | 40 | } |
69 | 328 | Self::Nulls(builder) => builder.append(!is_null), |
70 | | } |
71 | 403 | } |
72 | | |
73 | | /// return the number of heap allocated bytes used by this structure to store boolean values |
74 | 92 | pub fn allocated_size(&self) -> usize { |
75 | 92 | match self { |
76 | 0 | Self::NoNulls { .. } => 0, |
77 | | // BooleanBufferBuilder builder::capacity returns capacity in bits (not bytes) |
78 | 92 | Self::Nulls(builder) => builder.capacity() / 8, |
79 | | } |
80 | 92 | } |
81 | | |
82 | | /// Return a NullBuffer representing the accumulated nulls so far |
83 | 27 | pub fn build(self) -> Option<NullBuffer> { |
84 | 27 | match self { |
85 | 0 | Self::NoNulls { .. } => None, |
86 | 27 | Self::Nulls(mut builder) => Some(NullBuffer::from(builder.finish())), |
87 | | } |
88 | 27 | } |
89 | | |
90 | | /// Returns a NullBuffer representing the first `n` rows accumulated so far |
91 | | /// shifting any remaining down by `n` |
92 | 17 | pub fn take_n(&mut self, n: usize) -> Option<NullBuffer> { |
93 | 17 | match self { |
94 | 0 | Self::NoNulls { row_count } => { |
95 | 0 | *row_count -= n; |
96 | 0 | None |
97 | | } |
98 | 17 | Self::Nulls(builder) => { |
99 | 17 | // Copy over the values at n..len-1 values to the start of a |
100 | 17 | // new builder and leave it in self |
101 | 17 | // |
102 | 17 | // TODO: it would be great to use something like `set_bits` from arrow here. |
103 | 17 | let mut new_builder = BooleanBufferBuilder::new(builder.len()); |
104 | 61 | for i in n..builder.len()17 { |
105 | 61 | new_builder.append(builder.get_bit(i)); |
106 | 61 | } |
107 | 17 | std::mem::swap(&mut new_builder, builder); |
108 | 17 | |
109 | 17 | // take only first n values from the original builder |
110 | 17 | new_builder.truncate(n); |
111 | 17 | Some(NullBuffer::from(new_builder.finish())) |
112 | | } |
113 | | } |
114 | 17 | } |
115 | | } |