Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added List DType & slice baseline #1735

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ cargo-fuzz = true

[dependencies]
libfuzzer-sys = { workspace = true }
arrow-buffer = { workspace = true }
vortex-array = { workspace = true, features = ["arbitrary"] }
vortex-buffer = { workspace = true }
vortex-dtype = { workspace = true, features = ["arbitrary"] }
Expand Down
59 changes: 51 additions & 8 deletions fuzz/src/slice.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use arrow_buffer::ArrowNativeType;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray};
use vortex_array::array::{BoolArray, ListArray, PrimitiveArray, StructArray, VarBinViewArray};
use vortex_array::validity::{ArrayValidity, Validity};
use vortex_array::variants::StructArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_array::variants::{PrimitiveArrayTrait, StructArrayTrait};
use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
use vortex_error::VortexExpect;

pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> ArrayData {
Expand All @@ -28,11 +29,12 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
.vortex_expect("Validity length cannot mismatch")
.into_array()
}
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
DType::Primitive(p, _) => {
let primitive_array = array.clone().into_primitive().unwrap();
let vec_values = primitive_array.into_maybe_null_slice::<$P>();
PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array()
}),
match_each_native_ptype!(p, |$P| {
slice_primitive::<$P>(primitive_array, validity, start, stop)
})
}
DType::Utf8(_) | DType::Binary(_) => {
let utf8 = array.clone().into_varbinview().unwrap();
let values = utf8
Expand All @@ -56,6 +58,47 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
.unwrap()
.into_array()
}
DType::List(..) => {
let list_array = array.clone().into_list().unwrap();
let offsets = slice_canonical_array(&list_array.offsets(), start, stop)
.into_primitive()
.unwrap();

let elements = slice_canonical_array(
&list_array.elements(),
offsets.get_as_cast::<u64>(0) as usize,
offsets.get_as_cast::<u64>(offsets.len()) as usize,
);
let offsets = match_each_native_ptype!(offsets.ptype(), |$P| {
shift_offsets::<$P>(offsets)
})
.into_array();
ListArray::try_new(elements, offsets, validity)
.unwrap()
.into_array()
}
_ => unreachable!("Not a canonical array"),
}
}

fn shift_offsets<O: NativePType + ArrowNativeType>(offsets: PrimitiveArray) -> PrimitiveArray {
if offsets.is_empty() {
return offsets;
}
let offsets = offsets.into_maybe_null_slice::<O>();
let start = offsets[0];
PrimitiveArray::from_vec(
offsets.into_iter().map(|o| o - start).collect::<Vec<_>>(),
Validity::NonNullable,
)
}

fn slice_primitive<T: NativePType + ArrowNativeType>(
primitive_array: PrimitiveArray,
validity: Validity,
start: usize,
stop: usize,
) -> ArrayData {
let vec_values = primitive_array.into_maybe_null_slice::<T>();
PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array()
}
31 changes: 21 additions & 10 deletions fuzz/src/take.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use arrow_buffer::ArrowNativeType;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray};
use vortex_array::validity::{ArrayValidity, Validity};
use vortex_array::variants::StructArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
use vortex_error::VortexExpect;

pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
Expand All @@ -30,16 +31,12 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
.vortex_expect("Validity length cannot mismatch")
.into_array()
}
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
DType::Primitive(p, _) => {
let primitive_array = array.clone().into_primitive().unwrap();
let vec_values = primitive_array
.maybe_null_slice::<$P>()
.iter()
.copied()
.collect::<Vec<_>>();
PrimitiveArray::from_vec(indices.iter().map(|i| vec_values[*i]).collect(),validity)
.into_array()
}),
match_each_native_ptype!(p, |$P| {
take_primitive::<$P>(primitive_array, validity, indices)
})
}
DType::Utf8(_) | DType::Binary(_) => {
let utf8 = array.clone().into_varbinview().unwrap();
let values = utf8
Expand Down Expand Up @@ -70,3 +67,17 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
_ => unreachable!("Not a canonical array"),
}
}

fn take_primitive<T: NativePType + ArrowNativeType>(
primitive_array: PrimitiveArray,
validity: Validity,
indices: &[usize],
) -> ArrayData {
let vec_values = primitive_array
.maybe_null_slice::<T>()
.iter()
.copied()
.collect::<Vec<_>>();
PrimitiveArray::from_vec(indices.iter().map(|i| vec_values[*i]).collect(), validity)
.into_array()
}
2 changes: 1 addition & 1 deletion vortex-dtype/src/arbitrary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ fn random_dtype(u: &mut Unstructured<'_>, depth: u8) -> Result<DType> {
2 => DType::Utf8(u.arbitrary()?),
3 => DType::Binary(u.arbitrary()?),
4 => DType::Struct(random_struct_dtype(u, depth - 1)?, u.arbitrary()?),
5 => DType::List(Arc::new(random_dtype(u, depth - 1)?), u.arbitrary()?),
// Null,
// List(Arc<DType>, Nullability),
// Extension(ExtDType, Nullability),
_ => unreachable!("Number out of range"),
})
Expand Down
Loading