Skip to content

Commit

Permalink
Undo run end filter performance regression (#6691)
Browse files Browse the repository at this point in the history
* ensure predicate and values have the same length before passing on to filter_run_end_array

* fix error wording

* have filter_run_end_array use filter array with run_ends max value size

* use skip and take to iterate over filter values in filter_run_end_array

* check array values in max_value_gt_predicate_len test

* run end filter performance regression

* use names consistent with other functions

* clippy
  • Loading branch information
delamarch3 authored Nov 10, 2024
1 parent 5ad621f commit 24f455e
Showing 1 changed file with 12 additions and 12 deletions.
24 changes: 12 additions & 12 deletions arrow-select/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,30 +423,30 @@ fn filter_array(values: &dyn Array, predicate: &FilterPredicate) -> Result<Array

/// Filter any supported [`RunArray`] based on a [`FilterPredicate`]
fn filter_run_end_array<R: RunEndIndexType>(
re_arr: &RunArray<R>,
pred: &FilterPredicate,
array: &RunArray<R>,
predicate: &FilterPredicate,
) -> Result<RunArray<R>, ArrowError>
where
R::Native: Into<i64> + From<bool>,
R::Native: AddAssign,
{
let run_ends: &RunEndBuffer<R::Native> = re_arr.run_ends();
let run_ends: &RunEndBuffer<R::Native> = array.run_ends();
let mut values_filter = BooleanBufferBuilder::new(run_ends.len());
let mut new_run_ends = vec![R::default_value(); run_ends.len()];

let mut start = 0i64;
let mut start = 0u64;
let mut i = 0;
let mut count = R::default_value();
let filter_values = pred.filter.values();
let filter_values = predicate.filter.values();

for end in run_ends.inner().into_iter().map(|i| (*i).into()) {
for mut end in run_ends.inner().into_iter().map(|i| (*i).into() as u64) {
let mut keep = false;

for pred in filter_values
.iter()
.skip(start as usize)
.take((end - start) as usize)
{
let difference = end.saturating_sub(filter_values.len() as u64);
end -= difference;

// Safety: we subtract the difference off `end` so we are always within bounds
for pred in (start..end).map(|i| unsafe { filter_values.value_unchecked(i as usize) }) {
count += R::Native::from(pred);
keep |= pred
}
Expand All @@ -465,7 +465,7 @@ where
new_run_ends.clear();
}

let values = re_arr.values();
let values = array.values();
let pred = BooleanArray::new(values_filter.finish(), None);
let values = filter(&values, &pred)?;

Expand Down

0 comments on commit 24f455e

Please sign in to comment.