-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add fill_null compute function (#1590)
Co-authored-by: Will Manning <[email protected]>
- Loading branch information
1 parent
bb83f9b
commit c774557
Showing
13 changed files
with
216 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
use vortex_error::{vortex_err, VortexResult}; | ||
use vortex_scalar::Scalar; | ||
|
||
use crate::array::{BoolArray, BoolEncoding, ConstantArray}; | ||
use crate::compute::FillNullFn; | ||
use crate::validity::Validity; | ||
use crate::{ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; | ||
|
||
impl FillNullFn<BoolArray> for BoolEncoding { | ||
fn fill_null(&self, array: &BoolArray, fill_value: Scalar) -> VortexResult<ArrayData> { | ||
let fill = fill_value | ||
.as_bool() | ||
.value() | ||
.ok_or_else(|| vortex_err!("Fill value must be non null"))?; | ||
|
||
Ok(match array.validity() { | ||
Validity::NonNullable => array.clone().into_array(), | ||
Validity::AllValid => BoolArray::from(array.boolean_buffer()).into_array(), | ||
Validity::AllInvalid => ConstantArray::new(fill, array.len()).into_array(), | ||
Validity::Array(v) => { | ||
let bool_buffer = if fill { | ||
&array.boolean_buffer() | &!&v.into_bool()?.boolean_buffer() | ||
} else { | ||
&array.boolean_buffer() & &v.into_bool()?.boolean_buffer() | ||
}; | ||
BoolArray::from(bool_buffer).into_array() | ||
} | ||
}) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use arrow_buffer::BooleanBuffer; | ||
use rstest::rstest; | ||
use vortex_dtype::{DType, Nullability}; | ||
|
||
use crate::array::BoolArray; | ||
use crate::compute::fill_null; | ||
use crate::validity::Validity; | ||
use crate::{ArrayDType, IntoArrayVariant}; | ||
|
||
#[rstest] | ||
#[case(true, vec![true, true, false, true])] | ||
#[case(false, vec![true, false, false, false])] | ||
fn bool_fill_null(#[case] fill_value: bool, #[case] expected: Vec<bool>) { | ||
let bool_array = BoolArray::try_new( | ||
BooleanBuffer::from_iter([true, true, false, false]), | ||
Validity::from_iter([true, false, true, false]), | ||
) | ||
.unwrap(); | ||
let non_null_array = fill_null(bool_array, fill_value.into()) | ||
.unwrap() | ||
.into_bool() | ||
.unwrap(); | ||
assert_eq!( | ||
non_null_array.boolean_buffer().iter().collect::<Vec<_>>(), | ||
expected | ||
); | ||
assert_eq!( | ||
non_null_array.dtype(), | ||
&DType::Bool(Nullability::NonNullable) | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
use vortex_error::VortexResult; | ||
use vortex_scalar::Scalar; | ||
|
||
use crate::array::{ChunkedArray, ChunkedEncoding}; | ||
use crate::compute::{fill_null, FillNullFn}; | ||
use crate::{ArrayDType, ArrayData, IntoArrayData}; | ||
|
||
impl FillNullFn<ChunkedArray> for ChunkedEncoding { | ||
fn fill_null(&self, array: &ChunkedArray, fill_value: Scalar) -> VortexResult<ArrayData> { | ||
ChunkedArray::try_new( | ||
array | ||
.chunks() | ||
.map(|c| fill_null(c, fill_value.clone())) | ||
.collect::<VortexResult<Vec<_>>>()?, | ||
array.dtype().as_nonnullable(), | ||
) | ||
.map(|a| a.into_array()) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use arrow_buffer::BooleanBuffer; | ||
use vortex_dtype::{DType, Nullability}; | ||
|
||
use crate::array::{BoolArray, ChunkedArray}; | ||
use crate::compute::fill_null; | ||
use crate::validity::Validity; | ||
use crate::{ArrayDType, IntoArrayData}; | ||
|
||
#[test] | ||
fn fill_null_chunks() { | ||
let chunked = ChunkedArray::try_new( | ||
vec![ | ||
BoolArray::try_new(BooleanBuffer::new_set(5), Validity::AllInvalid) | ||
.unwrap() | ||
.into_array(), | ||
BoolArray::new(BooleanBuffer::new_set(5), Nullability::Nullable).into_array(), | ||
], | ||
DType::Bool(Nullability::Nullable), | ||
) | ||
.unwrap(); | ||
|
||
let filled = fill_null(chunked, false.into()).unwrap(); | ||
assert_eq!(*filled.dtype(), DType::Bool(Nullability::NonNullable)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; | ||
use vortex_scalar::Scalar; | ||
|
||
use crate::encoding::Encoding; | ||
use crate::{ArrayDType, ArrayData}; | ||
|
||
/// Implementation of fill_null for an encoding. | ||
/// | ||
/// SAFETY: the fill value is guaranteed to be non-null. | ||
pub trait FillNullFn<Array> { | ||
fn fill_null(&self, array: &Array, fill_value: Scalar) -> VortexResult<ArrayData>; | ||
} | ||
|
||
impl<E: Encoding> FillNullFn<ArrayData> for E | ||
where | ||
E: FillNullFn<E::Array>, | ||
for<'a> &'a E::Array: TryFrom<&'a ArrayData, Error = VortexError>, | ||
{ | ||
fn fill_null(&self, array: &ArrayData, fill_value: Scalar) -> VortexResult<ArrayData> { | ||
let array_ref = <&E::Array>::try_from(array)?; | ||
let encoding = array | ||
.encoding() | ||
.as_any() | ||
.downcast_ref::<E>() | ||
.ok_or_else(|| vortex_err!("Mismatched encoding"))?; | ||
FillNullFn::fill_null(encoding, array_ref, fill_value) | ||
} | ||
} | ||
|
||
pub fn fill_null(array: impl AsRef<ArrayData>, fill_value: Scalar) -> VortexResult<ArrayData> { | ||
let array = array.as_ref(); | ||
if !array.dtype().is_nullable() { | ||
return Ok(array.clone()); | ||
} | ||
|
||
if fill_value.is_null() { | ||
vortex_bail!("Cannot fill_null with a null value") | ||
} | ||
|
||
if !array.dtype().eq_ignore_nullability(fill_value.dtype()) { | ||
vortex_bail!(MismatchedTypes: array.dtype(), fill_value.dtype()) | ||
} | ||
|
||
array | ||
.encoding() | ||
.fill_null_fn() | ||
.map(|f| f.fill_null(array, fill_value)) | ||
.unwrap_or_else(|| Err(vortex_err!(NotImplemented: "fill_null", array.encoding().id()))) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.