Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
19
20
/// Builder for an (optional) null mask
21
///
22
/// Optimized for avoid creating the bitmask when all values are non-null
23
#[derive(Debug)]
24
pub(crate) enum MaybeNullBufferBuilder {
25
    ///  seen `row_count` rows but no nulls yet
26
    NoNulls { row_count: usize },
27
    /// have at least one null value
28
    ///
29
    /// Note this is an Arrow *VALIDITY* buffer (so it is false for nulls, true
30
    /// for non-nulls)
31
    Nulls(BooleanBufferBuilder),
32
}
33
34
impl MaybeNullBufferBuilder {
35
    /// Create a new builder
36
38
    pub fn new() -> Self {
37
38
        Self::NoNulls { row_count: 0 }
38
38
    }
39
40
    /// Return true if the row at index `row` is null
41
295k
    pub fn is_null(&self, row: usize) -> bool {
42
295k
        match self {
43
8.19k
            Self::NoNulls { .. } => false,
44
            // validity mask means a unset bit is NULL
45
286k
            Self::Nulls(builder) => !builder.get_bit(row),
46
        }
47
295k
    }
48
49
    /// Set the nullness of the next row to `is_null`
50
    ///
51
    /// num_values is the current length of the rows being tracked
52
    ///
53
    /// If `value` is true, the row is null.
54
    /// If `value` is false, the row is non null
55
403
    pub fn append(&mut self, is_null: bool) {
56
75
        match self {
57
75
            Self::NoNulls { row_count } if is_null => {
58
35
                // have seen no nulls so far, this is the  first null,
59
35
                // need to create the nulls buffer for all currently valid values
60
35
                // alloc 2x the need given we push a new but immediately
61
35
                let mut nulls = BooleanBufferBuilder::new(*row_count * 2);
62
35
                nulls.append_n(*row_count, true);
63
35
                nulls.append(false);
64
35
                *self = Self::Nulls(nulls);
65
35
            }
66
40
            Self::NoNulls { row_count } => {
67
40
                *row_count += 1;
68
40
            }
69
328
            Self::Nulls(builder) => builder.append(!is_null),
70
        }
71
403
    }
72
73
    /// return the number of heap allocated bytes used by this structure to store boolean values
74
92
    pub fn allocated_size(&self) -> usize {
75
92
        match self {
76
0
            Self::NoNulls { .. } => 0,
77
            // BooleanBufferBuilder builder::capacity returns capacity in bits (not bytes)
78
92
            Self::Nulls(builder) => builder.capacity() / 8,
79
        }
80
92
    }
81
82
    /// Return a NullBuffer representing the accumulated nulls so far
83
27
    pub fn build(self) -> Option<NullBuffer> {
84
27
        match self {
85
0
            Self::NoNulls { .. } => None,
86
27
            Self::Nulls(mut builder) => Some(NullBuffer::from(builder.finish())),
87
        }
88
27
    }
89
90
    /// Returns a NullBuffer representing the first `n` rows accumulated so far
91
    /// shifting any remaining down by `n`
92
17
    pub fn take_n(&mut self, n: usize) -> Option<NullBuffer> {
93
17
        match self {
94
0
            Self::NoNulls { row_count } => {
95
0
                *row_count -= n;
96
0
                None
97
            }
98
17
            Self::Nulls(builder) => {
99
17
                // Copy over the values at  n..len-1 values to the start of a
100
17
                // new builder and leave it in self
101
17
                //
102
17
                // TODO: it would be great to use something like `set_bits` from arrow here.
103
17
                let mut new_builder = BooleanBufferBuilder::new(builder.len());
104
61
                for i in 
n..builder.len()17
{
105
61
                    new_builder.append(builder.get_bit(i));
106
61
                }
107
17
                std::mem::swap(&mut new_builder, builder);
108
17
109
17
                // take only first n values from the original builder
110
17
                new_builder.truncate(n);
111
17
                Some(NullBuffer::from(new_builder.finish()))
112
            }
113
        }
114
17
    }
115
}