Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved iterators (#1270)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Oct 18, 2022
1 parent 6e46651 commit 27e109d
Show file tree
Hide file tree
Showing 33 changed files with 494 additions and 250 deletions.
4 changes: 2 additions & 2 deletions src/array/binary/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
array::{ArrayAccessor, ArrayValuesIter, Offset},
bitmap::utils::ZipValidity,
bitmap::utils::{BitmapIter, ZipValidity},
};

use super::BinaryArray;
Expand All @@ -24,7 +24,7 @@ pub type BinaryValueIter<'a, O> = ArrayValuesIter<'a, BinaryArray<O>>;

impl<'a, O: Offset> IntoIterator for &'a BinaryArray<O> {
type Item = Option<&'a [u8]>;
type IntoIter = ZipValidity<'a, &'a [u8], BinaryValueIter<'a, O>>;
type IntoIter = ZipValidity<&'a [u8], BinaryValueIter<'a, O>, BitmapIter<'a>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
Expand Down
6 changes: 3 additions & 3 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
bitmap::{
utils::{zip_validity, ZipValidity},
utils::{BitmapIter, ZipValidity},
Bitmap,
},
buffer::Buffer,
Expand Down Expand Up @@ -116,8 +116,8 @@ impl<O: Offset> BinaryArray<O> {
}

/// Returns an iterator of `Option<&[u8]>` over every element of this array.
pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter<O>> {
zip_validity(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
}

/// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity
Expand Down
24 changes: 19 additions & 5 deletions src/array/boolean/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,35 @@
use crate::bitmap::utils::{zip_validity, BitmapIter, ZipValidity};
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::bitmap::IntoIter;

use super::super::MutableArray;
use super::{BooleanArray, MutableBooleanArray};

impl<'a> IntoIterator for &'a BooleanArray {
type Item = Option<bool>;
type IntoIter = ZipValidity<'a, bool, BitmapIter<'a>>;
type IntoIter = ZipValidity<bool, BitmapIter<'a>, BitmapIter<'a>>;

#[inline]
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl IntoIterator for BooleanArray {
type Item = Option<bool>;
type IntoIter = ZipValidity<bool, IntoIter, IntoIter>;

#[inline]
fn into_iter(self) -> Self::IntoIter {
let (_, values, validity) = self.into_inner();
let values = values.into_iter();
let validity = validity.map(|x| x.into_iter());
ZipValidity::new(values, validity)
}
}

impl<'a> IntoIterator for &'a MutableBooleanArray {
type Item = Option<bool>;
type IntoIter = ZipValidity<'a, bool, BitmapIter<'a>>;
type IntoIter = ZipValidity<bool, BitmapIter<'a>, BitmapIter<'a>>;

#[inline]
fn into_iter(self) -> Self::IntoIter {
Expand All @@ -26,8 +40,8 @@ impl<'a> IntoIterator for &'a MutableBooleanArray {
impl<'a> MutableBooleanArray {
/// Returns an iterator over the optional values of this [`MutableBooleanArray`].
#[inline]
pub fn iter(&'a self) -> ZipValidity<'a, bool, BitmapIter<'a>> {
zip_validity(
pub fn iter(&'a self) -> ZipValidity<bool, BitmapIter<'a>, BitmapIter<'a>> {
ZipValidity::new(
self.values().iter(),
self.validity().as_ref().map(|x| x.iter()),
)
Expand Down
17 changes: 14 additions & 3 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
bitmap::{
utils::{zip_validity, BitmapIter, ZipValidity},
utils::{BitmapIter, ZipValidity},
Bitmap, MutableBitmap,
},
datatypes::{DataType, PhysicalType},
Expand Down Expand Up @@ -87,8 +87,8 @@ impl BooleanArray {

/// Returns an iterator over the optional values of this [`BooleanArray`].
#[inline]
pub fn iter(&self) -> ZipValidity<bool, BitmapIter> {
zip_validity(
pub fn iter(&self) -> ZipValidity<bool, BitmapIter, BitmapIter> {
ZipValidity::new(
self.values().iter(),
self.validity.as_ref().map(|x| x.iter()),
)
Expand Down Expand Up @@ -361,6 +361,17 @@ impl BooleanArray {
std::sync::Arc::new(self)
}

/// Returns its internal representation
#[must_use]
pub fn into_inner(self) -> (DataType, Bitmap, Option<Bitmap>) {
let Self {
data_type,
values,
validity,
} = self;
(data_type, values, validity)
}

/// The canonical method to create a [`BooleanArray`]
/// # Panics
/// This function errors iff:
Expand Down
4 changes: 2 additions & 2 deletions src/array/dictionary/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::bitmap::utils::ZipValidity;
use crate::bitmap::utils::{BitmapIter, ZipValidity};
use crate::scalar::Scalar;
use crate::trusted_len::TrustedLen;

Expand Down Expand Up @@ -56,7 +56,7 @@ impl<'a, K: DictionaryKey> DoubleEndedIterator for DictionaryValuesIter<'a, K> {
}

type ValuesIter<'a, K> = DictionaryValuesIter<'a, K>;
type ZipIter<'a, K> = ZipValidity<'a, Box<dyn Scalar>, ValuesIter<'a, K>>;
type ZipIter<'a, K> = ZipValidity<Box<dyn Scalar>, ValuesIter<'a, K>, BitmapIter<'a>>;

impl<'a, K: DictionaryKey> IntoIterator for &'a DictionaryArray<K> {
type Item = Option<Box<dyn Scalar>>;
Expand Down
6 changes: 3 additions & 3 deletions src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::hint::unreachable_unchecked;

use crate::{
bitmap::{
utils::{zip_validity, ZipValidity},
utils::{BitmapIter, ZipValidity},
Bitmap,
},
datatypes::{DataType, IntegerType},
Expand Down Expand Up @@ -190,8 +190,8 @@ impl<K: DictionaryKey> DictionaryArray<K> {
/// # Implementation
/// This function will allocate a new [`Scalar`] per item and is usually not performant.
/// Consider calling `keys_iter` and `values`, downcasting `values`, and iterating over that.
pub fn iter(&self) -> ZipValidity<Box<dyn Scalar>, DictionaryValuesIter<K>> {
zip_validity(
pub fn iter(&self) -> ZipValidity<Box<dyn Scalar>, DictionaryValuesIter<K>, BitmapIter> {
ZipValidity::new(
DictionaryValuesIter::new(self),
self.keys.validity().as_ref().map(|x| x.iter()),
)
Expand Down
71 changes: 15 additions & 56 deletions src/array/fixed_size_binary/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,53 +1,13 @@
use crate::bitmap::utils::{zip_validity, ZipValidity};
use crate::{
array::MutableArray,
bitmap::utils::{BitmapIter, ZipValidity},
};

use super::super::MutableArray;
use super::{FixedSizeBinaryArray, FixedSizeBinaryValues, MutableFixedSizeBinaryArray};

/// # Safety
/// This iterator is `TrustedLen`
pub struct FixedSizeBinaryValuesIter<'a, T: FixedSizeBinaryValues> {
array: &'a T,
len: usize,
index: usize,
}

impl<'a, T: FixedSizeBinaryValues> FixedSizeBinaryValuesIter<'a, T> {
#[inline]
pub fn new(array: &'a T) -> Self {
Self {
array,
len: array.values().len() / array.size(),
index: 0,
}
}
}

impl<'a, T: FixedSizeBinaryValues> Iterator for FixedSizeBinaryValuesIter<'a, T> {
type Item = &'a [u8];

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.len {
return None;
}
let index = self.index;
let r = Some(unsafe {
self.array
.values()
.get_unchecked(index * self.array.size()..(index + 1) * self.array.size())
});
self.index += 1;
r
}

fn size_hint(&self) -> (usize, Option<usize>) {
(self.len - self.index, Some(self.len - self.index))
}
}
use super::{FixedSizeBinaryArray, MutableFixedSizeBinaryArray};

impl<'a> IntoIterator for &'a FixedSizeBinaryArray {
type Item = Option<&'a [u8]>;
type IntoIter = ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>>;
type IntoIter = ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
Expand All @@ -58,20 +18,19 @@ impl<'a> FixedSizeBinaryArray {
/// constructs a new iterator
pub fn iter(
&'a self,
) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>> {
zip_validity(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
}

/// Returns iterator over the values of [`FixedSizeBinaryArray`]
pub fn values_iter(&'a self) -> FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray> {
FixedSizeBinaryValuesIter::new(self)
pub fn values_iter(&'a self) -> std::slice::ChunksExact<'a, u8> {
self.values().chunks_exact(self.size)
}
}

impl<'a> IntoIterator for &'a MutableFixedSizeBinaryArray {
type Item = Option<&'a [u8]>;
type IntoIter =
ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>>;
type IntoIter = ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
Expand All @@ -82,15 +41,15 @@ impl<'a> MutableFixedSizeBinaryArray {
/// constructs a new iterator
pub fn iter(
&'a self,
) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>> {
zip_validity(
) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> {
ZipValidity::new(
self.iter_values(),
self.validity().as_ref().map(|x| x.iter()),
)
}

/// Returns iterator over the values of [`MutableFixedSizeBinaryArray`]
pub fn iter_values(&'a self) -> FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray> {
FixedSizeBinaryValuesIter::new(self)
pub fn iter_values(&'a self) -> std::slice::ChunksExact<'a, u8> {
self.values().chunks_exact(self.size())
}
}
6 changes: 6 additions & 0 deletions src/array/fixed_size_binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ impl MutableFixedSizeBinaryArray {
self.try_push(value).unwrap()
}

/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.values.len() / self.size as usize
}

/// Pop the last entry from [`MutableFixedSizeBinaryArray`].
/// This function returns `None` iff this array is empty
pub fn pop(&mut self) -> Option<Vec<u8>> {
Expand Down
32 changes: 21 additions & 11 deletions src/array/fixed_size_list/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
use crate::{
array::{list::ListValuesIter, Array, IterableListArray},
bitmap::utils::{zip_validity, ZipValidity},
array::{Array, ArrayAccessor, ArrayValuesIter},
bitmap::utils::{BitmapIter, ZipValidity},
};

use super::FixedSizeListArray;

impl IterableListArray for FixedSizeListArray {
unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
FixedSizeListArray::value_unchecked(self, i)
unsafe impl<'a> ArrayAccessor<'a> for FixedSizeListArray {
type Item = Box<dyn Array>;

#[inline]
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
self.len()
}
}

type ValuesIter<'a> = ListValuesIter<'a, FixedSizeListArray>;
type ZipIter<'a> = ZipValidity<'a, Box<dyn Array>, ValuesIter<'a>>;
/// Iterator of values of a [`FixedSizeListArray`].
pub type FixedSizeListValuesIter<'a> = ArrayValuesIter<'a, FixedSizeListArray>;

type ZipIter<'a> = ZipValidity<Box<dyn Array>, FixedSizeListValuesIter<'a>, BitmapIter<'a>>;

impl<'a> IntoIterator for &'a FixedSizeListArray {
type Item = Option<Box<dyn Array>>;
Expand All @@ -26,14 +36,14 @@ impl<'a> IntoIterator for &'a FixedSizeListArray {
impl<'a> FixedSizeListArray {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipIter<'a> {
zip_validity(
ListValuesIter::new(self),
ZipValidity::new(
FixedSizeListValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
}

/// Returns an iterator of `Box<dyn Array>`
pub fn values_iter(&'a self) -> ValuesIter<'a> {
ListValuesIter::new(self)
pub fn values_iter(&'a self) -> FixedSizeListValuesIter<'a> {
FixedSizeListValuesIter::new(self)
}
}
5 changes: 2 additions & 3 deletions src/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@ mod private {
impl<'a, T: super::ArrayAccessor<'a>> Sealed for T {}
}

///
/// Sealed trait representing assess to a value of an array.
/// # Safety
/// Implementers of this trait guarantee that
/// `value_unchecked` is safe when called up to `len`
/// Implementations must guarantee that
pub unsafe trait ArrayAccessor<'a>: private::Sealed {
type Item: 'a;
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item;
fn len(&self) -> usize;
}

/// Iterator of values of an `ArrayAccessor`.
/// Iterator of values of an [`ArrayAccessor`].
#[derive(Debug, Clone)]
pub struct ArrayValuesIter<'a, A: ArrayAccessor<'a>> {
array: &'a A,
Expand Down
Loading

0 comments on commit 27e109d

Please sign in to comment.