Skip to content

Commit

Permalink
Add eq benchmark for StringArray/StringViewArray (apache#5924)
Browse files Browse the repository at this point in the history
* add neq/eq benchmark for String/ViewArray

* move bench to comparsion kernel

* clean unnecessary dep

* make clippy happy
  • Loading branch information
XiangpengHao authored Jun 21, 2024
1 parent 7ef6be4 commit 13c9e90
Showing 1 changed file with 51 additions and 0 deletions.
51 changes: 51 additions & 0 deletions arrow/benches/comparison_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#[macro_use]
extern crate criterion;
use arrow::util::test_util::seedable_rng;
use criterion::Criterion;

extern crate arrow;
Expand All @@ -27,6 +28,8 @@ use arrow::{array::*, datatypes::Float32Type, datatypes::Int32Type};
use arrow_buffer::IntervalMonthDayNano;
use arrow_string::like::*;
use arrow_string::regexp::regexp_is_match_utf8_scalar;
use rand::rngs::StdRng;
use rand::Rng;

const SIZE: usize = 65536;

Expand Down Expand Up @@ -55,6 +58,14 @@ fn bench_regexp_is_match_utf8_scalar(arr_a: &StringArray, value_b: &str) {
.unwrap();
}

fn make_string_array(size: usize, rng: &mut StdRng) -> impl Iterator<Item = Option<String>> + '_ {
(0..size).map(|_| {
let len = rng.gen_range(0..64);
let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect();
Some(String::from_utf8(bytes).unwrap())
})
}

fn add_benchmark(c: &mut Criterion) {
let arr_a = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 42);
let arr_b = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 43);
Expand All @@ -63,6 +74,7 @@ fn add_benchmark(c: &mut Criterion) {
let arr_month_day_nano_b = create_month_day_nano_array_with_seed(SIZE, 0.0, 43);

let arr_string = create_string_array::<i32>(SIZE, 0.0);

let scalar = Float32Array::from(vec![1.0]);

c.bench_function("eq Float32", |b| b.iter(|| eq(&arr_a, &arr_b)));
Expand Down Expand Up @@ -138,6 +150,45 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| eq(&arr_month_day_nano_b, &scalar).unwrap())
});

let mut rng = seedable_rng();
let mut array_gen = make_string_array(1024 * 1024 * 8, &mut rng);
let string_left = StringArray::from_iter(array_gen);
let string_view_left = StringViewArray::from_iter(string_left.iter());

// reference to the same rng to make sure we generate **different** array data,
// ow. the left and right will be identical
array_gen = make_string_array(1024 * 1024 * 8, &mut rng);
let string_right = StringArray::from_iter(array_gen);
let string_view_right = StringViewArray::from_iter(string_right.iter());

c.bench_function("eq scalar StringArray", |b| {
b.iter(|| {
eq(
&Scalar::new(StringArray::from_iter_values(["xxxx"])),
&string_left,
)
.unwrap()
})
});

c.bench_function("eq scalar StringViewArray", |b| {
b.iter(|| {
eq(
&Scalar::new(StringViewArray::from_iter_values(["xxxx"])),
&string_view_left,
)
.unwrap()
})
});

c.bench_function("eq StringArray StringArray", |b| {
b.iter(|| eq(&string_left, &string_right).unwrap())
});

c.bench_function("eq StringViewArray StringViewArray", |b| {
b.iter(|| eq(&string_view_left, &string_view_right).unwrap())
});

c.bench_function("like_utf8 scalar equals", |b| {
b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx"))
});
Expand Down

0 comments on commit 13c9e90

Please sign in to comment.