diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index f978686c11..8ac8f83405 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -130,7 +130,7 @@ jobs: BENCH_VORTEX_RATIOS: '.*' RUSTFLAGS: '-C target-cpu=native' run: | - cargo run --bin tpch_benchmark --release -- -d gh-json -t 1 | tee tpch.json + cargo run --bin tpch_benchmark --release -- --only-vortex -d gh-json -t 1 | tee tpch.json - name: Store benchmark result if: '!cancelled()' uses: benchmark-action/github-action-benchmark@v1 @@ -179,7 +179,7 @@ jobs: RUSTFLAGS: '-C target-cpu=native' HOME: /home/ci-runner run: | - cargo run --bin clickbench --release -- -d gh-json | tee clickbench.json + cargo run --bin clickbench --release -- --only-vortex -d gh-json | tee clickbench.json - name: Store benchmark result if: '!cancelled()' uses: benchmark-action/github-action-benchmark@v1 diff --git a/bench-vortex/src/bin/clickbench.rs b/bench-vortex/src/bin/clickbench.rs index 93377cc76e..8abc2dd6b5 100644 --- a/bench-vortex/src/bin/clickbench.rs +++ b/bench-vortex/src/bin/clickbench.rs @@ -22,7 +22,7 @@ use vortex::error::vortex_panic; #[derive(Parser, Debug)] #[command(version, about, long_about = None)] struct Args { - #[arg(short, long, default_value = "8")] + #[arg(short, long, default_value = "5")] iterations: usize, #[arg(short, long)] threads: Option, diff --git a/bench-vortex/src/bin/tpch_benchmark.rs b/bench-vortex/src/bin/tpch_benchmark.rs index 0d0dbcfe03..58c8a38dae 100644 --- a/bench-vortex/src/bin/tpch_benchmark.rs +++ b/bench-vortex/src/bin/tpch_benchmark.rs @@ -25,7 +25,7 @@ struct Args { threads: Option, #[arg(short, long, default_value_t = true, default_missing_value = "true", action = ArgAction::Set)] warmup: bool, - #[arg(short, long, default_value = "8")] + #[arg(short, long, default_value = "5")] iterations: usize, #[arg(long)] only_vortex: bool, diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index f8b85b004e..6ffa46f78e 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -8,7 +8,7 @@ use num_traits::AsPrimitive; use vortex_array::array::SparseArray; use vortex_array::compute::{ search_sorted_usize, IndexOrd, Len, SearchResult, SearchSorted, SearchSortedFn, - SearchSortedSide, + SearchSortedSide, SearchSortedUsizeFn, }; use vortex_array::stats::ArrayStatistics; use vortex_array::validity::Validity; @@ -32,24 +32,6 @@ impl SearchSortedFn for BitPackedEncoding { }) } - fn search_sorted_usize( - &self, - array: &BitPackedArray, - value: usize, - side: SearchSortedSide, - ) -> VortexResult { - match_each_unsigned_integer_ptype!(array.ptype(), |$P| { - // NOTE: conversion may truncate silently. - if let Some(pvalue) = num_traits::cast::(value) { - search_sorted_native(array, pvalue, side) - } else { - // provided u64 is too large to fit in the provided PType, value must be off - // the right end of the array. - Ok(SearchResult::NotFound(array.len())) - } - }) - } - fn search_sorted_many( &self, array: &BitPackedArray, @@ -69,6 +51,26 @@ impl SearchSortedFn for BitPackedEncoding { .try_collect() }) } +} + +impl SearchSortedUsizeFn for BitPackedEncoding { + fn search_sorted_usize( + &self, + array: &BitPackedArray, + value: usize, + side: SearchSortedSide, + ) -> VortexResult { + match_each_unsigned_integer_ptype!(array.ptype(), |$P| { + // NOTE: conversion may truncate silently. + if let Some(pvalue) = num_traits::cast::(value) { + search_sorted_native(array, pvalue, side) + } else { + // provided u64 is too large to fit in the provided PType, value must be off + // the right end of the array. + Ok(SearchResult::NotFound(array.len())) + } + }) + } fn search_sorted_usize_many( &self, @@ -121,7 +123,9 @@ where // max packed value just search the patches let usize_value: usize = value.as_(); if usize_value > array.max_packed_value() { - search_sorted_usize(&patches_array, value.as_(), side) + // FIXME(ngates): this is broken. Patches _aren't_ sorted because they're sparse and + // interspersed with nulls... + search_sorted_usize(&patches_array, usize_value, side) } else { Ok(BitPackedSearch::<'_, T>::new(array).search_sorted(&value, side)) } diff --git a/vortex-array/src/array/primitive/compute/mod.rs b/vortex-array/src/array/primitive/compute/mod.rs index 8c693784f9..2385d54112 100644 --- a/vortex-array/src/array/primitive/compute/mod.rs +++ b/vortex-array/src/array/primitive/compute/mod.rs @@ -1,7 +1,7 @@ use crate::array::PrimitiveEncoding; use crate::compute::{ - CastFn, ComputeVTable, FillForwardFn, FilterFn, ScalarAtFn, SearchSortedFn, SliceFn, - SubtractScalarFn, TakeFn, + CastFn, ComputeVTable, FillForwardFn, FilterFn, ScalarAtFn, SearchSortedFn, + SearchSortedUsizeFn, SliceFn, SubtractScalarFn, TakeFn, }; use crate::ArrayData; @@ -35,6 +35,10 @@ impl ComputeVTable for PrimitiveEncoding { Some(self) } + fn search_sorted_usize_fn(&self) -> Option<&dyn SearchSortedUsizeFn> { + Some(self) + } + fn slice_fn(&self) -> Option<&dyn SliceFn> { Some(self) } diff --git a/vortex-array/src/array/primitive/compute/search_sorted.rs b/vortex-array/src/array/primitive/compute/search_sorted.rs index 11a390d649..1373402758 100644 --- a/vortex-array/src/array/primitive/compute/search_sorted.rs +++ b/vortex-array/src/array/primitive/compute/search_sorted.rs @@ -7,7 +7,10 @@ use vortex_scalar::Scalar; use crate::array::primitive::PrimitiveArray; use crate::array::PrimitiveEncoding; -use crate::compute::{IndexOrd, Len, SearchResult, SearchSorted, SearchSortedFn, SearchSortedSide}; +use crate::compute::{ + IndexOrd, Len, SearchResult, SearchSorted, SearchSortedFn, SearchSortedSide, + SearchSortedUsizeFn, +}; use crate::validity::Validity; use crate::variants::PrimitiveArrayTrait; use crate::{ArrayDType, ArrayLen}; @@ -33,7 +36,9 @@ impl SearchSortedFn for PrimitiveEncoding { } }) } +} +impl SearchSortedUsizeFn for PrimitiveEncoding { #[allow(clippy::cognitive_complexity)] fn search_sorted_usize( &self, diff --git a/vortex-array/src/array/sparse/compute/mod.rs b/vortex-array/src/array/sparse/compute/mod.rs index 3433ed85f3..6c3c224f19 100644 --- a/vortex-array/src/array/sparse/compute/mod.rs +++ b/vortex-array/src/array/sparse/compute/mod.rs @@ -6,10 +6,11 @@ use crate::array::sparse::SparseArray; use crate::array::{PrimitiveArray, SparseEncoding}; use crate::compute::{ scalar_at, search_sorted, take, ComputeVTable, FilterFn, FilterMask, InvertFn, ScalarAtFn, - SearchResult, SearchSortedFn, SearchSortedSide, SliceFn, TakeFn, TakeOptions, + SearchResult, SearchSortedFn, SearchSortedSide, SearchSortedUsizeFn, SliceFn, TakeFn, + TakeOptions, }; use crate::variants::PrimitiveArrayTrait; -use crate::{ArrayData, IntoArrayData, IntoArrayVariant}; +use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; mod invert; mod slice; @@ -32,6 +33,10 @@ impl ComputeVTable for SparseEncoding { Some(self) } + fn search_sorted_usize_fn(&self) -> Option<&dyn SearchSortedUsizeFn> { + Some(self) + } + fn slice_fn(&self) -> Option<&dyn SliceFn> { Some(self) } @@ -50,6 +55,7 @@ impl ScalarAtFn for SparseEncoding { } } +// FIXME(ngates): these are broken in a way that works for array patches, this will be fixed soon. impl SearchSortedFn for SparseEncoding { fn search_sorted( &self, @@ -76,6 +82,22 @@ impl SearchSortedFn for SparseEncoding { } } +// FIXME(ngates): these are broken in a way that works for array patches, this will be fixed soon. +impl SearchSortedUsizeFn for SparseEncoding { + fn search_sorted_usize( + &self, + array: &SparseArray, + value: usize, + side: SearchSortedSide, + ) -> VortexResult { + let Ok(target) = Scalar::from(value).cast(array.dtype()) else { + // If the downcast fails, then the target is too large for the dtype. + return Ok(SearchResult::NotFound(array.len())); + }; + SearchSortedFn::search_sorted(self, array, &target, side) + } +} + impl FilterFn for SparseEncoding { fn filter(&self, array: &SparseArray, mask: FilterMask) -> VortexResult { let buffer = mask.to_boolean_buffer()?; diff --git a/vortex-array/src/compute/mod.rs b/vortex-array/src/compute/mod.rs index 1df8dbcc22..a34eb911ea 100644 --- a/vortex-array/src/compute/mod.rs +++ b/vortex-array/src/compute/mod.rs @@ -102,6 +102,13 @@ pub trait ComputeVTable { None } + /// Perform a search over an ordered array. + /// + /// See: [SearchSortedUsizeFn]. + fn search_sorted_usize_fn(&self) -> Option<&dyn SearchSortedUsizeFn> { + None + } + /// Perform zero-copy slicing of an array. /// /// See: [SliceFn]. diff --git a/vortex-array/src/compute/search_sorted.rs b/vortex-array/src/compute/search_sorted.rs index 3be8733072..a7d9ccc98a 100644 --- a/vortex-array/src/compute/search_sorted.rs +++ b/vortex-array/src/compute/search_sorted.rs @@ -106,16 +106,6 @@ pub trait SearchSortedFn { side: SearchSortedSide, ) -> VortexResult; - fn search_sorted_usize( - &self, - array: &Array, - value: usize, - side: SearchSortedSide, - ) -> VortexResult { - let usize_scalar = Scalar::from(value); - self.search_sorted(array, &usize_scalar, side) - } - /// Bulk search for many values. fn search_sorted_many( &self, @@ -128,6 +118,15 @@ pub trait SearchSortedFn { .map(|value| self.search_sorted(array, value, side)) .try_collect() } +} + +pub trait SearchSortedUsizeFn { + fn search_sorted_usize( + &self, + array: &Array, + value: usize, + side: SearchSortedSide, + ) -> VortexResult; fn search_sorted_usize_many( &self, @@ -162,34 +161,40 @@ where SearchSortedFn::search_sorted(encoding, array_ref, value, side) } - fn search_sorted_usize( + fn search_sorted_many( &self, array: &ArrayData, - value: usize, + values: &[Scalar], side: SearchSortedSide, - ) -> VortexResult { + ) -> VortexResult> { let array_ref = <&E::Array>::try_from(array)?; let encoding = array .encoding() .as_any() .downcast_ref::() .ok_or_else(|| vortex_err!("Mismatched encoding"))?; - SearchSortedFn::search_sorted_usize(encoding, array_ref, value, side) + SearchSortedFn::search_sorted_many(encoding, array_ref, values, side) } +} - fn search_sorted_many( +impl SearchSortedUsizeFn for E +where + E: SearchSortedUsizeFn, + for<'a> &'a E::Array: TryFrom<&'a ArrayData, Error = VortexError>, +{ + fn search_sorted_usize( &self, array: &ArrayData, - values: &[Scalar], + value: usize, side: SearchSortedSide, - ) -> VortexResult> { + ) -> VortexResult { let array_ref = <&E::Array>::try_from(array)?; let encoding = array .encoding() .as_any() .downcast_ref::() .ok_or_else(|| vortex_err!("Mismatched encoding"))?; - SearchSortedFn::search_sorted_many(encoding, array_ref, values, side) + SearchSortedUsizeFn::search_sorted_usize(encoding, array_ref, value, side) } fn search_sorted_usize_many( @@ -204,7 +209,7 @@ where .as_any() .downcast_ref::() .ok_or_else(|| vortex_err!("Mismatched encoding"))?; - SearchSortedFn::search_sorted_usize_many(encoding, array_ref, values, side) + SearchSortedUsizeFn::search_sorted_usize_many(encoding, array_ref, values, side) } } @@ -214,8 +219,8 @@ pub fn search_sorted>( side: SearchSortedSide, ) -> VortexResult { let Ok(scalar) = target.into().cast(array.dtype()) else { - // If the cast fails, then the search value must be higher than the highest value in - // the array. + // Try to downcast the usize ot the array type, if the downcast fails, then we know the + // usize is too large and the value is greater than the highest value in the array. return Ok(SearchResult::NotFound(array.len())); }; @@ -243,14 +248,28 @@ pub fn search_sorted_usize( target: usize, side: SearchSortedSide, ) -> VortexResult { - if let Some(f) = array.encoding().search_sorted_fn() { + if let Some(f) = array.encoding().search_sorted_usize_fn() { return f.search_sorted_usize(array, target, side); } - // Fallback to a generic search_sorted using scalar_at + // Otherwise, convert the target into a scalar to try the search_sorted_fn + let Ok(target) = Scalar::from(target).cast(array.dtype()) else { + return Ok(SearchResult::NotFound(array.len())); + }; + + // Try the non-usize search sorted + if let Some(f) = array.encoding().search_sorted_fn() { + return f.search_sorted(array, &target, side); + } + + // Or fallback all the way to a generic search_sorted using scalar_at if array.encoding().scalar_at_fn().is_some() { - let scalar = Scalar::primitive(target as u64, array.dtype().nullability()); - return Ok(SearchSorted::search_sorted(array, &scalar, side)); + // Try to downcast the usize to the array type, if the downcast fails, then we know the + // usize is too large and the value is greater than the highest value in the array. + let Ok(target) = target.cast(array.dtype()) else { + return Ok(SearchResult::NotFound(array.len())); + }; + return Ok(SearchSorted::search_sorted(array, &target, side)); } vortex_bail!( @@ -287,7 +306,7 @@ pub fn search_sorted_usize_many( targets: &[usize], side: SearchSortedSide, ) -> VortexResult> { - if let Some(f) = array.encoding().search_sorted_fn() { + if let Some(f) = array.encoding().search_sorted_usize_fn() { return f.search_sorted_usize_many(array, targets, side); } @@ -299,6 +318,8 @@ pub fn search_sorted_usize_many( } pub trait IndexOrd { + /// PartialOrd of the value at index `idx` with `elem`. + /// For example, if self\[idx\] > elem, return Some(Greater). fn index_cmp(&self, idx: usize, elem: &V) -> Option; fn index_lt(&self, idx: usize, elem: &V) -> bool {