Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance of bitmap from_trusted.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Nov 5, 2021
1 parent 4fd3aa1 commit 1c8b55d
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 11 deletions.
79 changes: 68 additions & 11 deletions src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::hint::unreachable_unchecked;
use std::iter::FromIterator;

use crate::bitmap::utils::merge_reversed;
Expand Down Expand Up @@ -309,6 +310,66 @@ impl FromIterator<bool> for MutableBitmap {
}
}

/// Extends the [`MutableBuffer`] from `iterator`
/// # Safety
/// The iterator MUST be [`TrustedLen`].
#[inline]
unsafe fn from_trusted_iter_unchecked(
buffer: &mut MutableBuffer<u8>,
mut iterator: impl Iterator<Item = bool>,
) -> usize {
// todo: lift this restriction => handling offsets and adding support to `extend`.
assert_eq!(buffer.len() % 8, 0);
let additional_bits = iterator.size_hint().1.unwrap();
let chunks = additional_bits / 8;
let remainder = additional_bits % 8;

let additional = chunks + (remainder > 0) as usize;
buffer.reserve(additional);

if chunks > 0 {
let mut byte_accum: u8 = 0;
let mut mask: u8 = 1;

for _ in 0..chunks {
for _ in 0..8 {
let value = match iterator.next() {
Some(value) => value,
None => unsafe { unreachable_unchecked() },
};

byte_accum |= match value {
true => mask,
false => 0,
};
mask <<= 1;
}
// Soundness: capacity was allocated above
unsafe { buffer.push_unchecked(byte_accum) };
}
}

if remainder > 0 {
let mut byte_accum: u8 = 0;
let mut mask: u8 = 1;
for _ in 0..remainder {
let value = match iterator.next() {
Some(value) => value,
None => unsafe { unreachable_unchecked() },
};

byte_accum |= match value {
true => mask,
false => 0,
};
mask <<= 1;
}
// Soundness: capacity was allocated above
unsafe { buffer.push_unchecked(byte_accum) };
}
additional_bits
}

#[inline]
fn extend<I: Iterator<Item = bool>>(buffer: &mut [u8], length: usize, mut iterator: I) {
let chunks = length / 8;
Expand Down Expand Up @@ -339,6 +400,7 @@ impl MutableBitmap {
/// Extends `self` from an iterator of trusted len.
/// # Safety
/// The caller must guarantee that the iterator has a trusted len.
#[inline]
pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
&mut self,
mut iterator: I,
Expand Down Expand Up @@ -388,31 +450,26 @@ impl MutableBitmap {
/// Creates a new [`MutableBitmap`] from an iterator of booleans.
/// # Safety
/// The iterator must report an accurate length.
#[inline]
pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
where
I: Iterator<Item = bool>,
{
let length = iterator.size_hint().1.unwrap();

let mut buffer = MutableBuffer::<u8>::from_len_zeroed((length + 7) / 8);
let mut buffer = MutableBuffer::<u8>::new();

extend(&mut buffer, length, iterator);
let length = from_trusted_iter_unchecked(&mut buffer, iterator);

Self { buffer, length }
}

/// Creates a new [`MutableBitmap`] from an iterator of booleans.
#[inline]
pub fn from_trusted_len_iter<I>(iterator: I) -> Self
where
I: TrustedLen<Item = bool>,
{
let length = iterator.size_hint().1.unwrap();

let mut buffer = MutableBuffer::<u8>::from_len_zeroed((length + 7) / 8);

extend(&mut buffer, length, iterator);

Self { buffer, length }
// Safety: Iterator is `TrustedLen`
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
}

/// Creates a new [`MutableBitmap`] from an iterator of booleans.
Expand Down
7 changes: 7 additions & 0 deletions tests/it/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ use arrow2::{
buffer::MutableBuffer,
};

#[test]
fn from_slice() {
let slice = &[true, false, true];
let a = MutableBitmap::from(slice);
assert_eq!(a.iter().collect::<Vec<_>>(), slice);
}

#[test]
fn trusted_len() {
let data = vec![true; 65];
Expand Down

0 comments on commit 1c8b55d

Please sign in to comment.