Skip to content

Commit

Permalink
Add BitIterator (#3856)
Browse files Browse the repository at this point in the history
* Add BitIterator

* Review feedback
  • Loading branch information
tustvold authored Mar 17, 2023
1 parent eacb135 commit b9090ae
Showing 1 changed file with 100 additions and 2 deletions.
102 changes: 100 additions & 2 deletions arrow-data/src/bit_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,72 @@
// under the License.

use arrow_buffer::bit_chunk_iterator::{UnalignedBitChunk, UnalignedBitChunkIterator};
use arrow_buffer::bit_util::{ceil, get_bit_raw};
use std::result::Result;

/// Iterator over the bits within a packed bitmask
///
/// To efficiently iterate over just the set bits see [`BitIndexIterator`] and [`BitSliceIterator`]
pub struct BitIterator<'a> {
buffer: &'a [u8],
current_offset: usize,
end_offset: usize,
}

impl<'a> BitIterator<'a> {
/// Create a new [`BitIterator`] from the provided `buffer`,
/// and `offset` and `len` in bits
///
/// # Panic
///
/// Panics if `buffer` is too short for the provided offset and length
pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
let end_offset = offset.checked_add(len).unwrap();
let required_len = ceil(end_offset, 8);
assert!(
buffer.len() >= required_len,
"BitIterator buffer too small, expected {required_len} got {}",
buffer.len()
);

Self {
buffer,
current_offset: offset,
end_offset,
}
}
}

impl<'a> Iterator for BitIterator<'a> {
type Item = bool;

fn next(&mut self) -> Option<Self::Item> {
if self.current_offset == self.end_offset {
return None;
}
// Safety:
// offsets in bounds
let v = unsafe { get_bit_raw(self.buffer.as_ptr(), self.current_offset) };
self.current_offset += 1;
Some(v)
}
}

impl<'a> ExactSizeIterator for BitIterator<'a> {}

impl<'a> DoubleEndedIterator for BitIterator<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.current_offset == self.end_offset {
return None;
}
self.end_offset -= 1;
// Safety:
// offsets in bounds
let v = unsafe { get_bit_raw(self.buffer.as_ptr(), self.end_offset) };
Some(v)
}
}

/// Iterator of contiguous ranges of set bits within a provided packed bitmask
///
/// Returns `(usize, usize)` each representing an interval where the corresponding
Expand All @@ -32,7 +96,7 @@ pub struct BitSliceIterator<'a> {
}

impl<'a> BitSliceIterator<'a> {
/// Create a new [`BitSliceIterator`] from the provide `buffer`,
/// Create a new [`BitSliceIterator`] from the provided `buffer`,
/// and `offset` and `len` in bits
pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
let chunk = UnalignedBitChunk::new(buffer, offset, len);
Expand Down Expand Up @@ -192,4 +256,38 @@ pub fn try_for_each_valid_idx<E, F: FnMut(usize) -> Result<(), E>>(
}
}

// Note: tests located in filter module
// Note: further tests located in arrow_select::filter module

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_bit_iterator() {
let mask = &[0b00010010, 0b00100011, 0b00000101, 0b00010001, 0b10010011];
let actual: Vec<_> = BitIterator::new(mask, 0, 5).collect();
assert_eq!(actual, &[false, true, false, false, true]);

let actual: Vec<_> = BitIterator::new(mask, 4, 5).collect();
assert_eq!(actual, &[true, false, false, false, true]);

let actual: Vec<_> = BitIterator::new(mask, 12, 14).collect();
assert_eq!(
actual,
&[
false, true, false, false, true, false, true, false, false, false, false,
false, true, false
]
);

assert_eq!(BitIterator::new(mask, 0, 0).count(), 0);
assert_eq!(BitIterator::new(mask, 40, 0).count(), 0);
}

#[test]
#[should_panic(expected = "BitIterator buffer too small, expected 3 got 2")]
fn test_bit_iterator_bounds() {
let mask = &[223, 23];
BitIterator::new(mask, 17, 0);
}
}

0 comments on commit b9090ae

Please sign in to comment.