Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert FixedSizeListArray to GenericListArray #4273

Merged
merged 1 commit into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 43 additions & 5 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::array::{get_offsets, make_array, print_long_array};
use crate::builder::{GenericListBuilder, PrimitiveBuilder};
use crate::{
iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
ArrowPrimitiveType,
ArrowPrimitiveType, FixedSizeListArray,
};
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
Expand Down Expand Up @@ -310,9 +310,7 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSiz
}
}

impl<OffsetSize: 'static + OffsetSizeTrait> From<GenericListArray<OffsetSize>>
for ArrayData
{
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
fn from(array: GenericListArray<OffsetSize>) -> Self {
let len = array.len();
let builder = ArrayDataBuilder::new(array.data_type)
Expand All @@ -325,6 +323,27 @@ impl<OffsetSize: 'static + OffsetSizeTrait> From<GenericListArray<OffsetSize>>
}
}

impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray>
for GenericListArray<OffsetSize>
{
fn from(value: FixedSizeListArray) -> Self {
let (field, size) = match value.data_type() {
DataType::FixedSizeList(f, size) => (f, *size as usize),
_ => unreachable!(),
};

let offsets =
OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));

Self {
data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
nulls: value.nulls().cloned(),
values: value.values().clone(),
value_offsets: offsets,
}
}
}

impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
if data.buffers().len() != 1 {
Expand Down Expand Up @@ -509,7 +528,8 @@ pub type LargeListArray = GenericListArray<i64>;
#[cfg(test)]
mod tests {
use super::*;
use crate::builder::{Int32Builder, ListBuilder};
use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder};
use crate::cast::AsArray;
use crate::types::Int32Type;
use crate::{Int32Array, Int64Array};
use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
Expand Down Expand Up @@ -1178,4 +1198,22 @@ mod tests {
"Invalid argument error: Max offset of 5 exceeds length of values 2"
);
}

#[test]
fn test_from_fixed_size_list() {
let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
builder.values().append_slice(&[1, 2, 3]);
builder.append(true);
builder.values().append_slice(&[0, 0, 0]);
builder.append(false);
builder.values().append_slice(&[4, 5, 6]);
builder.append(true);
let list: ListArray = builder.finish().into();

let values: Vec<_> = list
.iter()
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
.collect();
assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
}
}
52 changes: 52 additions & 0 deletions arrow-buffer/src/buffer/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,35 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
Self(buffer.into_buffer().into())
}

/// Create a new [`OffsetBuffer`] from the iterator of slice lengths
///
/// ```
/// # use arrow_buffer::OffsetBuffer;
/// let offsets = OffsetBuffer::<i32>::from_lengths([1, 3, 5]);
/// assert_eq!(offsets.as_ref(), &[0, 1, 4, 9]);
/// ```
///
/// # Panics
///
/// Panics on overflow
pub fn from_lengths<I>(lengths: I) -> Self
where
I: IntoIterator<Item = usize>,
{
let iter = lengths.into_iter();
let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
out.push(O::usize_as(0));

let mut acc = 0_usize;
for length in iter {
acc = acc.checked_add(length).expect("usize overflow");
out.push(O::usize_as(acc))
}
// Check for overflow
O::from_usize(acc).expect("offset overflow");
Self(out.into())
}

/// Returns the inner [`ScalarBuffer`]
pub fn inner(&self) -> &ScalarBuffer<O> {
&self.0
Expand Down Expand Up @@ -139,4 +168,27 @@ mod tests {
fn non_monotonic_offsets() {
OffsetBuffer::new(vec![1, 2, 0].into());
}

#[test]
fn from_lengths() {
let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);

let half_max = i32::MAX / 2;
let buffer =
OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
}

#[test]
#[should_panic(expected = "offset overflow")]
fn from_lengths_offset_overflow() {
OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
}

#[test]
#[should_panic(expected = "usize overflow")]
fn from_lengths_usize_overflow() {
OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
}
}