Skip to content

Commit

Permalink
feat: split computation of primitive statistics (#1306)
Browse files Browse the repository at this point in the history
  • Loading branch information
lwwmanning authored Nov 15, 2024
1 parent fed92df commit 7585509
Show file tree
Hide file tree
Showing 10 changed files with 226 additions and 167 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ impl<T: BitPacking + NativePType> IndexOrd<T> for BitPackedSearch<'_, T> {
idx + self.offset as usize,
)
};
Some(val.compare(*elem))
Some(val.total_compare(*elem))
}
}

Expand Down
46 changes: 1 addition & 45 deletions encodings/runend/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@ use std::cmp::min;
use itertools::Itertools;
use num_traits::{AsPrimitive, FromPrimitive};
use vortex_array::array::PrimitiveArray;
use vortex_array::compute::unary::scalar_at;
use vortex_array::stats::{ArrayStatistics, Stat};
use vortex_array::validity::Validity;
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::ArrayDType;
use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType, Nullability};
use vortex_error::{vortex_panic, VortexResult, VortexUnwrap as _};
use vortex_error::{vortex_panic, VortexResult};

pub fn runend_encode(array: &PrimitiveArray) -> (PrimitiveArray, PrimitiveArray) {
let validity = if array.dtype().nullability() == Nullability::NonNullable {
Expand All @@ -23,48 +21,6 @@ pub fn runend_encode(array: &PrimitiveArray) -> (PrimitiveArray, PrimitiveArray)
(PrimitiveArray::from_vec(ends, Validity::NonNullable), PrimitiveArray::from_vec(values, validity))
});

// the values array stats are trivially derived
compressed_values
.statistics()
.set(Stat::RunCount, compressed_values.len().into());
compressed_values
.statistics()
.set(Stat::IsConstant, (compressed_values.len() == 1).into());
if let Some(min) = array.statistics().get(Stat::Min) {
compressed_values.statistics().set(Stat::Min, min);
}
if let Some(max) = array.statistics().get(Stat::Max) {
compressed_values.statistics().set(Stat::Max, max);
}
if let Some(is_sorted) = array.statistics().get(Stat::IsSorted) {
compressed_values
.statistics()
.set(Stat::IsSorted, is_sorted);
}
if let Some(is_strict_sorted) = array.statistics().get(Stat::IsStrictSorted) {
compressed_values
.statistics()
.set(Stat::IsStrictSorted, is_strict_sorted);
}

compressed_ends
.statistics()
.set(Stat::IsConstant, (compressed_ends.len() == 1).into());
compressed_ends
.statistics()
.set(Stat::IsSorted, true.into());
compressed_ends
.statistics()
.set(Stat::IsStrictSorted, true.into());
if !compressed_ends.is_empty() {
compressed_ends
.statistics()
.set(Stat::Min, scalar_at(&compressed_ends, 0).vortex_unwrap());
compressed_ends
.statistics()
.set(Stat::Max, (array.len() as u64).into());
}

assert_eq!(array.dtype(), compressed_values.dtype());
(compressed_ends, compressed_values)
}
Expand Down
1 change: 0 additions & 1 deletion encodings/zigzag/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ pub fn zigzag_decode(parray: PrimitiveArray) -> VortexResult<PrimitiveArray> {
parray.ptype()
),
};
decoded.inherit_statistics(parray.statistics());
Ok(decoded)
}

Expand Down
2 changes: 1 addition & 1 deletion fuzz/src/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl<T: NativePType> IndexOrd<Option<T>> for SearchPrimitiveSlice<T> {
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
match unsafe { self.0.get_unchecked(idx) } {
None => Some(Ordering::Greater),
Some(i) => Some(i.compare(*v)),
Some(i) => Some(i.total_compare(*v)),
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion fuzz/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub fn sort_canonical_array(array: &Array) -> Array {

fn sort_primitive_slice<T: NativePType>(s: &mut [Option<T>]) {
s.sort_by(|a, b| match (a, b) {
(Some(v), Some(w)) => v.compare(*w),
(Some(v), Some(w)) => v.total_compare(*w),
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Greater,
(Some(_), None) => Ordering::Less,
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/primitive/compute/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ impl<'a, T: NativePType> SearchSortedPrimitive<'a, T> {
impl<T: NativePType> IndexOrd<T> for SearchSortedPrimitive<'_, T> {
fn index_cmp(&self, idx: usize, elem: &T) -> Option<Ordering> {
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
Some(unsafe { self.values.get_unchecked(idx) }.compare(*elem))
Some(unsafe { self.values.get_unchecked(idx) }.total_compare(*elem))
}
}

Expand Down
Loading

0 comments on commit 7585509

Please sign in to comment.