From 8e754ad63f013f86265003f1b2c02b69ee62c43b Mon Sep 17 00:00:00 2001 From: Jay Han <11144133+doki23@users.noreply.github.com> Date: Fri, 6 Dec 2024 03:22:30 +0800 Subject: [PATCH] implement binary_boolean for chunked encoding (#1532) Close #1443 --- .../src/array/chunked/compute/boolean.rs | 56 ++++++++++++++----- vortex-array/src/array/chunked/compute/mod.rs | 1 + vortex-array/src/compute/boolean.rs | 6 +- vortex-array/src/compute/mod.rs | 4 +- vortex-array/src/macros.rs | 5 +- 5 files changed, 55 insertions(+), 17 deletions(-) diff --git a/vortex-array/src/array/chunked/compute/boolean.rs b/vortex-array/src/array/chunked/compute/boolean.rs index 7011da02aa..4d8f1de30b 100644 --- a/vortex-array/src/array/chunked/compute/boolean.rs +++ b/vortex-array/src/array/chunked/compute/boolean.rs @@ -1,9 +1,9 @@ -use vortex_dtype::{DType, Nullability}; +use vortex_dtype::DType; use vortex_error::VortexResult; use crate::array::{ChunkedArray, ChunkedEncoding}; -use crate::compute::{and, and_kleene, or, or_kleene, slice, BinaryBooleanFn, BinaryOperator}; -use crate::{ArrayData, IntoArrayData}; +use crate::compute::{binary_boolean, slice, BinaryBooleanFn, BinaryOperator}; +use crate::{ArrayDType, ArrayData, IntoArrayData}; impl BinaryBooleanFn for ChunkedEncoding { fn binary_boolean( @@ -17,18 +17,48 @@ impl BinaryBooleanFn for ChunkedEncoding { for chunk in lhs.chunks() { let sliced = slice(rhs, idx, idx + chunk.len())?; - let result = match op { - BinaryOperator::And => and(&chunk, &sliced), - BinaryOperator::AndKleene => and_kleene(&chunk, &sliced), - BinaryOperator::Or => or(&chunk, &sliced), - BinaryOperator::OrKleene => or_kleene(&chunk, &sliced), - }; - chunks.push(result?); + let result = binary_boolean(&chunk, &sliced, op)?; + chunks.push(result); idx += chunk.len(); } - Ok(Some( - ChunkedArray::try_new(chunks, DType::Bool(Nullability::Nullable))?.into_array(), - )) + let nullable = lhs.dtype().is_nullable() || rhs.dtype().is_nullable(); + let dtype = DType::Bool(nullable.into()); + Ok(Some(ChunkedArray::try_new(chunks, dtype)?.into_array())) + } +} + +#[cfg(test)] +mod tests { + use vortex_dtype::{DType, Nullability}; + + use crate::array::{BoolArray, ChunkedArray}; + use crate::compute::{binary_boolean, BinaryOperator}; + use crate::{IntoArrayData, IntoArrayVariant}; + + #[test] + fn test_bin_bool_chunked() { + let arr0 = BoolArray::from_iter(vec![true, false]).into_array(); + let arr1 = BoolArray::from_iter(vec![false, false, true]).into_array(); + let chunked1 = + ChunkedArray::try_new(vec![arr0, arr1], DType::Bool(Nullability::NonNullable)).unwrap(); + + let arr2 = BoolArray::from_iter(vec![Some(false), Some(true)]).into_array(); + let arr3 = BoolArray::from_iter(vec![Some(false), None, Some(false)]).into_array(); + let chunked2 = + ChunkedArray::try_new(vec![arr2, arr3], DType::Bool(Nullability::Nullable)).unwrap(); + + assert_eq!( + binary_boolean( + &chunked1.into_array(), + &chunked2.into_array(), + BinaryOperator::Or + ) + .unwrap() + .into_bool() + .unwrap() + .boolean_buffer(), + vec![true, true, false, false, true].into() + ); } } diff --git a/vortex-array/src/array/chunked/compute/mod.rs b/vortex-array/src/array/chunked/compute/mod.rs index befdb365aa..ea9945a711 100644 --- a/vortex-array/src/array/chunked/compute/mod.rs +++ b/vortex-array/src/array/chunked/compute/mod.rs @@ -41,6 +41,7 @@ impl ComputeVTable for ChunkedEncoding { fn scalar_at_fn(&self) -> Option<&dyn ScalarAtFn> { Some(self) } + fn slice_fn(&self) -> Option<&dyn SliceFn> { Some(self) } diff --git a/vortex-array/src/compute/boolean.rs b/vortex-array/src/compute/boolean.rs index 8004493c2b..ab548fdc65 100644 --- a/vortex-array/src/compute/boolean.rs +++ b/vortex-array/src/compute/boolean.rs @@ -79,7 +79,11 @@ pub fn or_kleene( binary_boolean(lhs.as_ref(), rhs.as_ref(), BinaryOperator::OrKleene) } -fn binary_boolean(lhs: &ArrayData, rhs: &ArrayData, op: BinaryOperator) -> VortexResult { +pub fn binary_boolean( + lhs: &ArrayData, + rhs: &ArrayData, + op: BinaryOperator, +) -> VortexResult { if lhs.len() != rhs.len() { vortex_bail!("Boolean operations aren't supported on arrays of different lengths") } diff --git a/vortex-array/src/compute/mod.rs b/vortex-array/src/compute/mod.rs index f5aabddfd4..1df8dbcc22 100644 --- a/vortex-array/src/compute/mod.rs +++ b/vortex-array/src/compute/mod.rs @@ -7,7 +7,9 @@ //! implementations of these operators, else we will decode, and perform the equivalent operator //! from Arrow. -pub use boolean::{and, and_kleene, or, or_kleene, BinaryBooleanFn, BinaryOperator}; +pub use boolean::{ + and, and_kleene, binary_boolean, or, or_kleene, BinaryBooleanFn, BinaryOperator, +}; pub use cast::{try_cast, CastFn}; pub use compare::{compare, scalar_cmp, CompareFn, Operator}; pub use fill_forward::{fill_forward, FillForwardFn}; diff --git a/vortex-array/src/macros.rs b/vortex-array/src/macros.rs index 9a69549309..42863258c3 100644 --- a/vortex-array/src/macros.rs +++ b/vortex-array/src/macros.rs @@ -11,8 +11,8 @@ impl> ToArrayData for A { /// Macro to generate all the necessary code for a new type of array encoding. Including: /// 1. New Array type that implements `AsRef`, `GetArrayMetadata`, `ToArray`, `IntoArray`, and multiple useful `From`/`TryFrom` implementations. -/// 1. New Encoding type that implements `ArrayEncoding`. -/// 1. New metadata type that implements `ArrayMetadata`. +/// 2. New Encoding type that implements `ArrayEncoding`. +/// 3. New metadata type that implements `ArrayMetadata`. #[macro_export] macro_rules! impl_encoding { ($id:literal, $code:expr, $Name:ident) => { @@ -26,6 +26,7 @@ macro_rules! impl_encoding { self.0 } } + impl AsRef<$crate::ArrayData> for [<$Name Array>] { fn as_ref(&self) -> &$crate::ArrayData { &self.0