Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added extend/extend_unchecked for MutableBinaryArray #486

Merged
merged 3 commits into from
Oct 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 189 additions & 33 deletions src/array/binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,77 @@ impl<O: Offset> MutableBinaryArray<O> {
unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
}

/// Extends the [`MutableBinaryArray`] from an iterator of trusted length.
/// This differs from `extend_trusted_len` which accepts iterator of optional values.
#[inline]
pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: TrustedLen<Item = P>,
{
// Safety: The iterator is `TrustedLen`
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
}

/// Extends the [`MutableBinaryArray`] from an `iterator` of values of trusted length.
/// This differs from `extend_trusted_len_unchecked` which accepts iterator of optional
/// values.
/// # Safety
/// The `iterator` must be [`TrustedLen`]
#[inline]
pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: Iterator<Item = P>,
{
let (_, upper) = iterator.size_hint();
let additional = upper.expect("extend_trusted_len_values requires an upper limit");

extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);

if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}

/// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]
#[inline]
pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: TrustedLen<Item = Option<P>>,
{
// Safety: The iterator is `TrustedLen`
unsafe { self.extend_trusted_len_unchecked(iterator) }
}

/// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]
/// # Safety
/// The `iterator` must be [`TrustedLen`]
#[inline]
pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: Iterator<Item = Option<P>>,
{
if self.validity.is_none() {
let mut validity = MutableBitmap::new();
validity.extend_constant(self.len(), true);
self.validity = Some(validity);
}

extend_from_trusted_len_iter(
&mut self.offsets,
&mut self.values,
&mut self.validity.as_mut().unwrap(),
iterator,
);

if self.validity.as_mut().unwrap().null_count() == 0 {
self.validity = None;
}
}

/// Creates a new [`MutableBinaryArray`] from a [`Iterator`] of `&[u8]`.
pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {
let (offsets, values) = values_iter(iterator);
Expand Down Expand Up @@ -329,36 +400,21 @@ where
let (_, upper) = iterator.size_hint();
let len = upper.expect("trusted_len_unzip requires an upper limit");

let mut null = MutableBitmap::with_capacity(len);
let mut offsets = MutableBuffer::<O>::with_capacity(len + 1);
let mut values = MutableBuffer::<u8>::new();
let mut validity = MutableBitmap::new();

let mut length = O::default();
let mut dst = offsets.as_mut_ptr();
std::ptr::write(dst, length);
dst = dst.add(1);
for item in iterator {
if let Some(item) = item {
null.push(true);
let s = item.as_ref();
length += O::from_usize(s.len()).unwrap();
values.extend_from_slice(s);
} else {
null.push(false);
values.extend_from_slice(b"");
};
offsets.push_unchecked(O::default());

std::ptr::write(dst, length);
dst = dst.add(1);
}
assert_eq!(
dst.offset_from(offsets.as_ptr()) as usize,
len + 1,
"Trusted iterator length was not accurately reported"
);
offsets.set_len(len + 1);
extend_from_trusted_len_iter(&mut offsets, &mut values, &mut validity, iterator);

(null.into(), offsets, values)
let validity = if validity.null_count() > 0 {
Some(validity)
} else {
None
};

(validity, offsets, values)
}

/// # Safety
Expand Down Expand Up @@ -426,26 +482,126 @@ where
let mut offsets = MutableBuffer::<O>::with_capacity(len + 1);
let mut values = MutableBuffer::<u8>::new();

let mut length = O::default();
offsets.push_unchecked(O::default());

extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator);

(offsets, values)
}

// Populates `offsets` and `values` [`MutableBuffer`]s with information extracted
// from the incoming `iterator`.
// # Safety
// The caller must ensure the `iterator` is [`TrustedLen`]
#[inline]
unsafe fn extend_from_trusted_len_values_iter<I, P, O>(
offsets: &mut MutableBuffer<O>,
values: &mut MutableBuffer<u8>,
iterator: I,
) where
O: Offset,
P: AsRef<[u8]>,
I: Iterator<Item = P>,
{
let (_, upper) = iterator.size_hint();
let additional = upper.expect("extend_from_trusted_len_values_iter requires an upper limit");

offsets.reserve(additional);

// Read in the last offset, will be used to increment and store
// new values later on
let mut length = *offsets.last().unwrap();

// Get a mutable pointer to the `offsets`, and move the pointer
// to the position, where a new value will be written
let mut dst = offsets.as_mut_ptr();
std::ptr::write(dst, length);
dst = dst.add(1);
dst = dst.add(offsets.len());

for item in iterator {
let s = item.as_ref();

// Calculate the new offset value
length += O::from_usize(s.len()).unwrap();

// Push new entries for both `values` and `offsets` buffer
values.extend_from_slice(s);
std::ptr::write(dst, length);

// Move to the next position in offset buffer
dst = dst.add(1);
}

debug_assert_eq!(
dst.offset_from(offsets.as_ptr()) as usize,
offsets.len() + additional,
"TrustedLen iterator's length was not accurately reported"
);

// We make sure to set the new length for the `offsets` buffer
offsets.set_len(offsets.len() + additional);
}

// Populates `offsets`, `values`, and `validity` [`MutableBuffer`]s with
// information extracted from the incoming `iterator`.
//
// # Safety
// The caller must ensure that `iterator` is [`TrustedLen`]
#[inline]
unsafe fn extend_from_trusted_len_iter<O, I, P>(
offsets: &mut MutableBuffer<O>,
values: &mut MutableBuffer<u8>,
validity: &mut MutableBitmap,
iterator: I,
) where
O: Offset,
P: AsRef<[u8]>,
I: Iterator<Item = Option<P>>,
{
let (_, upper) = iterator.size_hint();
let additional = upper.expect("extend_from_trusted_len_iter requires an upper limit");

offsets.reserve(additional);
validity.reserve(additional);

// Read in the last offset, will be used to increment and store
// new values later on
let mut length = *offsets.last().unwrap();

// Get a mutable pointer to the `offsets`, and move the pointer
// to the position, where a new value will be written
let mut dst = offsets.as_mut_ptr();
dst = dst.add(offsets.len());

for item in iterator {
if let Some(item) = item {
let bytes = item.as_ref();

// Calculate new offset value
length += O::from_usize(bytes.len()).unwrap();

// Push new values for `values` and `validity` buffer
values.extend_from_slice(bytes);
validity.push_unchecked(true);
} else {
// If `None`, update only `validity`
validity.push_unchecked(false);
}

// Push new offset or old offset depending on the `item`
std::ptr::write(dst, length);

// Move to the next position in offset buffer
dst = dst.add(1);
}
assert_eq!(

debug_assert_eq!(
dst.offset_from(offsets.as_ptr()) as usize,
len + 1,
"Trusted iterator length was not accurately reported"
offsets.len() + additional,
"TrustedLen iterator's length was not accurately reported"
);
offsets.set_len(len + 1);

(offsets, values)
// We make sure to set the new length for the `offsets` buffer
offsets.set_len(offsets.len() + additional);
}

/// Creates two [`MutableBuffer`]s from an iterator of `&[u8]`.
Expand Down
35 changes: 35 additions & 0 deletions tests/it/array/binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,38 @@ fn push_null() {
let array: BinaryArray<i32> = array.into();
assert_eq!(array.validity(), Some(&Bitmap::from([false])));
}

#[test]
fn extend_trusted_len_values() {
let mut array = MutableBinaryArray::<i32>::new();

array.extend_trusted_len_values(vec![b"first".to_vec(), b"second".to_vec()].into_iter());
array.extend_trusted_len_values(vec![b"third".to_vec()].into_iter());
array.extend_trusted_len(vec![None, Some(b"fourth".to_vec())].into_iter());

let array: BinaryArray<i32> = array.into();

assert_eq!(array.values().as_slice(), b"firstsecondthirdfourth");
assert_eq!(array.offsets().as_slice(), &[0, 5, 11, 16, 16, 22]);
assert_eq!(
array.validity(),
Some(&Bitmap::from_u8_slice(&[0b00010111], 5))
);
}

#[test]
fn extend_trusted_len() {
let mut array = MutableBinaryArray::<i32>::new();

array.extend_trusted_len(vec![Some(b"first".to_vec()), Some(b"second".to_vec())].into_iter());
array.extend_trusted_len(vec![None, Some(b"third".to_vec())].into_iter());

let array: BinaryArray<i32> = array.into();

assert_eq!(array.values().as_slice(), b"firstsecondthird");
assert_eq!(array.offsets().as_slice(), &[0, 5, 11, 11, 16]);
assert_eq!(
array.validity(),
Some(&Bitmap::from_u8_slice(&[0b00001011], 4))
);
}