Skip to content

Commit

Permalink
Convert FixedSizeListArray to GenericListArray
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed May 24, 2023
1 parent df691d5 commit f469262
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 5 deletions.
48 changes: 43 additions & 5 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::array::{get_offsets, make_array, print_long_array};
use crate::builder::{GenericListBuilder, PrimitiveBuilder};
use crate::{
iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
ArrowPrimitiveType,
ArrowPrimitiveType, FixedSizeListArray,
};
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
Expand Down Expand Up @@ -310,9 +310,7 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSiz
}
}

impl<OffsetSize: 'static + OffsetSizeTrait> From<GenericListArray<OffsetSize>>
for ArrayData
{
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
fn from(array: GenericListArray<OffsetSize>) -> Self {
let len = array.len();
let builder = ArrayDataBuilder::new(array.data_type)
Expand All @@ -325,6 +323,27 @@ impl<OffsetSize: 'static + OffsetSizeTrait> From<GenericListArray<OffsetSize>>
}
}

impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray>
for GenericListArray<OffsetSize>
{
fn from(value: FixedSizeListArray) -> Self {
let (field, size) = match value.data_type() {
DataType::FixedSizeList(f, size) => (f, *size as usize),
_ => unreachable!(),
};

let offsets =
OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));

Self {
data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
nulls: value.nulls().cloned(),
values: value.values().clone(),
value_offsets: offsets,
}
}
}

impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
if data.buffers().len() != 1 {
Expand Down Expand Up @@ -509,7 +528,8 @@ pub type LargeListArray = GenericListArray<i64>;
#[cfg(test)]
mod tests {
use super::*;
use crate::builder::{Int32Builder, ListBuilder};
use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder};
use crate::cast::AsArray;
use crate::types::Int32Type;
use crate::{Int32Array, Int64Array};
use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
Expand Down Expand Up @@ -1178,4 +1198,22 @@ mod tests {
"Invalid argument error: Max offset of 5 exceeds length of values 2"
);
}

#[test]
fn test_from_fixed_size_list() {
let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
builder.values().append_slice(&[1, 2, 3]);
builder.append(true);
builder.values().append_slice(&[0, 0, 0]);
builder.append(false);
builder.values().append_slice(&[4, 5, 6]);
builder.append(true);
let list: ListArray = builder.finish().into();

let values: Vec<_> = list
.iter()
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
.collect();
assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
}
}
52 changes: 52 additions & 0 deletions arrow-buffer/src/buffer/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,35 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
Self(buffer.into_buffer().into())
}

/// Create a new [`OffsetBuffer`] from the iterator of slice lengths
///
/// ```
/// # use arrow_buffer::OffsetBuffer;
/// let offsets = OffsetBuffer::<i32>::from_lengths([1, 3, 5]);
/// assert_eq!(offsets.as_ref(), &[0, 1, 4, 9]);
/// ```
///
/// # Panics
///
/// Panics on overflow
pub fn from_lengths<I>(lengths: I) -> Self
where
I: IntoIterator<Item = usize>,
{
let iter = lengths.into_iter();
let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
out.push(O::usize_as(0));

let mut acc = 0_usize;
for length in iter {
acc = acc.checked_add(length).expect("usize overflow");
out.push(O::usize_as(acc))
}
// Check for overflow
O::from_usize(acc).expect("offset overflow");
Self(out.into())
}

/// Returns the inner [`ScalarBuffer`]
pub fn inner(&self) -> &ScalarBuffer<O> {
&self.0
Expand Down Expand Up @@ -139,4 +168,27 @@ mod tests {
fn non_monotonic_offsets() {
OffsetBuffer::new(vec![1, 2, 0].into());
}

#[test]
fn from_lengths() {
let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);

let half_max = i32::MAX / 2;
let buffer =
OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
}

#[test]
#[should_panic(expected = "offset overflow")]
fn from_lengths_offset_overflow() {
OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
}

#[test]
#[should_panic(expected = "usize overflow")]
fn from_lengths_usize_overflow() {
OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
}
}

0 comments on commit f469262

Please sign in to comment.