Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Faster take with null values (2-3x) (#633)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Nov 25, 2021
1 parent e9a6c3e commit c3222aa
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 86 deletions.
11 changes: 10 additions & 1 deletion benches/take_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_take(&values, &indices))
});

let values_nulls = create_boolean_array(size, 0.2, 0.5);
c.bench_function(&format!("take bool values nulls 2^{}", log2_size), |b| {
b.iter(|| bench_take(&values_nulls, &indices))
});

c.bench_function(&format!("take bool nulls 2^{}", log2_size), |b| {
b.iter(|| bench_take(&values, &indices_nulls))
});
Expand All @@ -63,10 +68,14 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_take(&values, &indices))
});

let values = create_string_array::<i32>(512, 4, 0.0, 42);
c.bench_function(&format!("take str nulls 2^{}", log2_size), |b| {
b.iter(|| bench_take(&values, &indices_nulls))
});

let values_nulls = create_string_array::<i32>(size, 4, 0.2, 42);
c.bench_function(&format!("take str values nulls 2^{}", log2_size), |b| {
b.iter(|| bench_take(&values_nulls, &indices))
});
});

let values = create_string_array::<i32>(512, 4, 0.0, 42);
Expand Down
36 changes: 7 additions & 29 deletions src/compute/take/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::{
array::{Array, BooleanArray, PrimitiveArray},
bitmap::{Bitmap, MutableBitmap},
Expand All @@ -35,21 +18,16 @@ fn take_values_validity<I: Index>(
values: &BooleanArray,
indices: &[I],
) -> (Bitmap, Option<Bitmap>) {
let mut validity = MutableBitmap::with_capacity(indices.len());

let validity_values = values.validity().unwrap();
let validity = indices
.iter()
.map(|index| validity_values.get_bit(index.to_usize()));
let validity = Bitmap::from_trusted_len_iter(validity);

let values_values = values.values();

let values = indices.iter().map(|index| {
let index = index.to_usize();
if validity_values.get_bit(index) {
validity.push(true);
} else {
validity.push(false);
}
values_values.get_bit(index)
});
let values = indices
.iter()
.map(|index| values_values.get_bit(index.to_usize()));
let buffer = Bitmap::from_trusted_len_iter(values);

(buffer, validity.into())
Expand Down
36 changes: 9 additions & 27 deletions src/compute/take/generic_binary.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,3 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use crate::{
array::{GenericBinaryArray, Offset, PrimitiveArray},
bitmap::{Bitmap, MutableBitmap},
Expand Down Expand Up @@ -66,25 +50,23 @@ pub fn take_values_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(
values: &A,
indices: &[I],
) -> (Buffer<O>, Buffer<u8>, Option<Bitmap>) {
let validity_values = values.validity().unwrap();
let validity = indices
.iter()
.map(|index| validity_values.get_bit(index.to_usize()));
let validity = Bitmap::from_trusted_len_iter(validity);

let mut length = O::default();
let mut validity = MutableBitmap::with_capacity(indices.len());

let null_values = values.validity().unwrap();
let offsets = values.offsets();
let values_values = values.values();

let mut starts = MutableBuffer::<O>::with_capacity(indices.len());
let offsets = indices.iter().map(|index| {
let index = index.to_usize();
if null_values.get_bit(index) {
validity.push(true);
let start = offsets[index];
length += offsets[index + 1] - start;
starts.push(start);
} else {
validity.push(false);
starts.push(O::default());
}
let start = offsets[index];
length += offsets[index + 1] - start;
starts.push(start);
length
});
let offsets = std::iter::once(O::default()).chain(offsets);
Expand Down
36 changes: 7 additions & 29 deletions src/compute/take/primitive.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,3 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use crate::{
array::{Array, PrimitiveArray},
bitmap::{Bitmap, MutableBitmap},
Expand All @@ -39,24 +23,18 @@ fn take_values_validity<T: NativeType, I: Index>(
values: &PrimitiveArray<T>,
indices: &[I],
) -> (Buffer<T>, Option<Bitmap>) {
let mut null = MutableBitmap::with_capacity(indices.len());
let values_validity = values.validity().unwrap();

let null_values = values.validity().unwrap();
let validity = indices
.iter()
.map(|index| values_validity.get_bit(index.to_usize()));
let validity = MutableBitmap::from_trusted_len_iter(validity);

let values_values = values.values();

let values = indices.iter().map(|index| {
let index = index.to_usize();
if null_values.get_bit(index) {
null.push(true);
} else {
null.push(false);
}
values_values[index]
});
let values = indices.iter().map(|index| values_values[index.to_usize()]);
let buffer = MutableBuffer::from_trusted_len_iter(values);

(buffer.into(), null.into())
(buffer.into(), validity.into())
}

// take implementation when only indices contain nulls
Expand Down

0 comments on commit c3222aa

Please sign in to comment.