Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance of boolean comparison (5x-14x) (#318)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Aug 23, 2021
1 parent 43f582c commit 0b0efe3
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 86 deletions.
88 changes: 36 additions & 52 deletions benches/comparison_kernels.rs
Original file line number Diff line number Diff line change
@@ -1,64 +1,48 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;
use criterion::Criterion;
use criterion::{black_box, criterion_group, criterion_main, Criterion};

use arrow2::array::*;
use arrow2::scalar::*;
use arrow2::util::bench_util::*;
use arrow2::{compute::comparison::*, datatypes::DataType, types::NativeType};
use arrow2::{compute::comparison::*, datatypes::DataType};

fn bench_op<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>, op: Operator)
where
T: NativeType,
{
compare(criterion::black_box(arr_a), criterion::black_box(arr_b), op).unwrap();
fn bench_op(arr_a: &dyn Array, arr_b: &dyn Array, op: Operator) {
compare(black_box(arr_a), black_box(arr_b), op).unwrap();
}

fn bench_op_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T, op: Operator)
where
T: NativeType + Simd8,
{
primitive_compare_scalar(
criterion::black_box(arr_a),
criterion::black_box(value_b),
op,
);
fn bench_op_scalar(arr_a: &dyn Array, value_b: &dyn Scalar, op: Operator) {
compare_scalar(black_box(arr_a), black_box(value_b), op).unwrap();
}

fn add_benchmark(c: &mut Criterion) {
let size = 65536;
let arr_a = create_primitive_array_with_seed::<f32>(size, DataType::Float32, 0.0, 42);
let arr_b = create_primitive_array_with_seed::<f32>(size, DataType::Float32, 0.0, 43);

c.bench_function("eq Float32", |b| {
b.iter(|| bench_op(&arr_a, &arr_b, Operator::Eq))
});
c.bench_function("eq scalar Float32", |b| {
b.iter(|| bench_op_scalar(&arr_a, 0.5, Operator::Eq))
});

c.bench_function("lt Float32", |b| {
b.iter(|| bench_op(&arr_a, &arr_b, Operator::Lt))
});
c.bench_function("lt scalar Float32", |b| {
b.iter(|| bench_op_scalar(&arr_a, 0.5, Operator::Lt))
});
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let arr_a = create_primitive_array_with_seed::<f32>(size, DataType::Float32, 0.0, 42);
let arr_b = create_primitive_array_with_seed::<f32>(size, DataType::Float32, 0.0, 43);

c.bench_function(&format!("f32 2^{}", log2_size), |b| {
b.iter(|| bench_op(&arr_a, &arr_b, Operator::Eq))
});
c.bench_function(&format!("f32 scalar 2^{}", log2_size), |b| {
b.iter(|| {
bench_op_scalar(
&arr_a,
&PrimitiveScalar::<f32>::from(Some(0.5)),
Operator::Eq,
)
})
});

let arr_a = create_boolean_array(size, 0.0, 0.1);
let arr_b = create_boolean_array(size, 0.0, 0.2);

c.bench_function(&format!("bool 2^{}", log2_size), |b| {
b.iter(|| bench_op(&arr_a, &arr_b, Operator::Eq))
});
c.bench_function(&format!("bool scalar 2^{}", log2_size), |b| {
b.iter(|| bench_op_scalar(&arr_a, &BooleanScalar::from(Some(true)), Operator::Eq))
});
})
}

criterion_group!(benches, add_benchmark);
Expand Down
71 changes: 37 additions & 34 deletions src/compute/comparison/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,6 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::array::*;
use crate::bitmap::Bitmap;
use crate::buffer::MutableBuffer;
use crate::scalar::{BooleanScalar, Scalar};
use crate::{
bitmap::MutableBitmap,
Expand All @@ -27,20 +11,30 @@ use super::{super::utils::combine_validities, Operator};

pub fn compare_values_op<F>(lhs: &Bitmap, rhs: &Bitmap, op: F) -> MutableBitmap
where
F: Fn(bool, bool) -> bool,
F: Fn(u8, u8) -> u8,
{
assert_eq!(lhs.len(), rhs.len());
let lhs_iter = lhs.iter();
let rhs_iter = rhs.iter();
let lhs_iter = lhs.chunks();
let rhs_iter = rhs.chunks();
let lhs_remainder = lhs_iter.remainder();
let rhs_remainder = rhs_iter.remainder();

let mut values = MutableBuffer::with_capacity((lhs.len() + 7) / 8);
let iter = lhs_iter.zip(rhs_iter).map(|(x, y)| op(x, y));
values.extend_from_trusted_len_iter(iter);

MutableBitmap::from_trusted_len_iter(lhs_iter.zip(rhs_iter).map(|(x, y)| op(x, y)))
if lhs.len() % 8 != 0 {
values.push(op(lhs_remainder, rhs_remainder))
};

MutableBitmap::from_buffer(values, lhs.len())
}

/// Evaluate `op(lhs, rhs)` for [`BooleanArray`]s using a specified
/// comparison function.
fn compare_op<F>(lhs: &BooleanArray, rhs: &BooleanArray, op: F) -> Result<BooleanArray>
where
F: Fn(bool, bool) -> bool,
F: Fn(u8, u8) -> u8,
{
if lhs.len() != rhs.len() {
return Err(ArrowError::InvalidArgumentError(
Expand All @@ -59,32 +53,41 @@ where
/// a specified comparison function.
pub fn compare_op_scalar<F>(lhs: &BooleanArray, rhs: bool, op: F) -> BooleanArray
where
F: Fn(bool, bool) -> bool,
F: Fn(u8, u8) -> u8,
{
let lhs_iter = lhs.values().iter();

let values = Bitmap::from_trusted_len_iter(lhs_iter.map(|x| op(x, rhs)));
let lhs_iter = lhs.values().chunks();
let lhs_remainder = lhs_iter.remainder();
let rhs = if rhs { 0b11111111 } else { 0 };

let mut values = MutableBuffer::with_capacity((lhs.len() + 7) / 8);
let iter = lhs_iter.map(|x| op(x, rhs));
values.extend_from_trusted_len_iter(iter);

if lhs.len() % 8 != 0 {
values.push(op(lhs_remainder, rhs))
};
let values = MutableBitmap::from_buffer(values, lhs.len()).into();
BooleanArray::from_data(values, lhs.validity().clone())
}

/// Perform `lhs == rhs` operation on two arrays.
pub fn eq(lhs: &BooleanArray, rhs: &BooleanArray) -> Result<BooleanArray> {
compare_op(lhs, rhs, |a, b| a == b)
compare_op(lhs, rhs, |a, b| !(a ^ b))
}

/// Perform `left == right` operation on an array and a scalar value.
pub fn eq_scalar(lhs: &BooleanArray, rhs: bool) -> BooleanArray {
compare_op_scalar(lhs, rhs, |a, b| a == b)
compare_op_scalar(lhs, rhs, |a, b| !(a ^ b))
}

/// Perform `left != right` operation on two arrays.
pub fn neq(lhs: &BooleanArray, rhs: &BooleanArray) -> Result<BooleanArray> {
compare_op(lhs, rhs, |a, b| a != b)
compare_op(lhs, rhs, |a, b| a ^ b)
}

/// Perform `left != right` operation on an array and a scalar value.
pub fn neq_scalar(lhs: &BooleanArray, rhs: bool) -> BooleanArray {
compare_op_scalar(lhs, rhs, |a, b| a != b)
compare_op_scalar(lhs, rhs, |a, b| a ^ b)
}

/// Perform `left < right` operation on two arrays.
Expand All @@ -99,13 +102,13 @@ pub fn lt_scalar(lhs: &BooleanArray, rhs: bool) -> BooleanArray {

/// Perform `left <= right` operation on two arrays.
pub fn lt_eq(lhs: &BooleanArray, rhs: &BooleanArray) -> Result<BooleanArray> {
compare_op(lhs, rhs, |a, b| a <= b)
compare_op(lhs, rhs, |a, b| !a | b)
}

/// Perform `left <= right` operation on an array and a scalar value.
/// Null values are less than non-null values.
pub fn lt_eq_scalar(lhs: &BooleanArray, rhs: bool) -> BooleanArray {
compare_op_scalar(lhs, rhs, |a, b| a <= b)
compare_op_scalar(lhs, rhs, |a, b| !a | b)
}

/// Perform `left > right` operation on two arrays. Non-null values are greater than null
Expand All @@ -123,13 +126,13 @@ pub fn gt_scalar(lhs: &BooleanArray, rhs: bool) -> BooleanArray {
/// Perform `left >= right` operation on two arrays. Non-null values are greater than null
/// values.
pub fn gt_eq(lhs: &BooleanArray, rhs: &BooleanArray) -> Result<BooleanArray> {
compare_op(lhs, rhs, |a, b| a >= b)
compare_op(lhs, rhs, |a, b| a | !b)
}

/// Perform `left >= right` operation on an array and a scalar value.
/// Non-null values are greater than null values.
pub fn gt_eq_scalar(lhs: &BooleanArray, rhs: bool) -> BooleanArray {
compare_op_scalar(lhs, rhs, |a, b| a >= b)
compare_op_scalar(lhs, rhs, |a, b| a | !b)
}

pub fn compare(lhs: &BooleanArray, rhs: &BooleanArray, op: Operator) -> Result<BooleanArray> {
Expand Down

0 comments on commit 0b0efe3

Please sign in to comment.