Skip to content

Commit

Permalink
chore: minor benchmark improvements (#1310)
Browse files Browse the repository at this point in the history
  • Loading branch information
lwwmanning authored Nov 15, 2024
1 parent 38e9860 commit 2940ac9
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 3 deletions.
20 changes: 20 additions & 0 deletions bench-vortex/benches/compress_noci.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mod tokio_runtime;

use core::str::FromStr;
use core::sync::atomic::{AtomicBool, Ordering};
use std::io::Cursor;
use std::path::Path;
use std::sync::Arc;
Expand All @@ -16,11 +18,13 @@ use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions};
use bench_vortex::{fetch_taxi_data, tpch};
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use futures::TryStreamExt;
use log::LevelFilter;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use parquet::arrow::ArrowWriter;
use parquet::basic::{Compression, ZstdLevel};
use parquet::file::properties::WriterProperties;
use regex::Regex;
use simplelog::*;
use tokio::runtime::Runtime;
use vortex::array::{ChunkedArray, StructArray};
use vortex::buffer::Buffer;
Expand All @@ -41,6 +45,8 @@ struct GenericBenchmarkResults<'a> {
range: f64,
}

static LOG_INITIALIZED: AtomicBool = AtomicBool::new(false);

fn ensure_dir_exists(dir: &str) -> std::io::Result<()> {
let path = Path::new(dir);
if !path.exists() {
Expand Down Expand Up @@ -164,6 +170,20 @@ fn benchmark_compress<F, U>(
F: Fn() -> U,
U: AsRef<Array>,
{
// if no logging is enabled, enable it
if !LOG_INITIALIZED.swap(true, Ordering::SeqCst) {
TermLogger::init(
env::var("RUST_LOG")
.ok()
.and_then(|s| LevelFilter::from_str(&s).ok())
.unwrap_or(LevelFilter::Off),
Config::default(),
TerminalMode::Mixed,
ColorChoice::Auto,
)
.unwrap();
}

ensure_dir_exists("benchmarked-files").unwrap();
let runtime = &TOKIO_RUNTIME;
let uncompressed = make_uncompressed();
Expand Down
56 changes: 53 additions & 3 deletions bench-vortex/benches/compressor_throughput.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion, Throughput};
use itertools::Itertools as _;
use mimalloc::MiMalloc;
use rand::{Rng, SeedableRng as _};
use rand::distributions::Alphanumeric;
use rand::seq::SliceRandom as _;
use rand::{thread_rng, Rng, SeedableRng as _};
use vortex::aliases::hash_set::HashSet;
use vortex::array::PrimitiveArray;
use vortex::array::{PrimitiveArray, VarBinViewArray};
use vortex::compute::unary::try_cast;
use vortex::dict::{dict_encode_varbinview, DictArray};
use vortex::dtype::PType;
use vortex::fsst::{fsst_compress, fsst_train_compressor};
use vortex::sampling_compressor::compressors::alp::ALPCompressor;
use vortex::sampling_compressor::compressors::alp_rd::ALPRDCompressor;
use vortex::sampling_compressor::compressors::bitpacked::{
Expand Down Expand Up @@ -92,5 +96,51 @@ fn primitive(c: &mut Criterion) {
}
}

criterion_group!(benches, primitive);
fn strings(c: &mut Criterion) {
let mut group = c.benchmark_group("string-decompression");
let num_values = u16::MAX as u64;
group.throughput(Throughput::Bytes(num_values * 8));

let varbinview_arr = VarBinViewArray::from_iter_str(gen_varbin_words(1_000_000, 0.00005));
let (codes, values) = dict_encode_varbinview(&varbinview_arr);
group.throughput(Throughput::Bytes(
varbinview_arr.clone().into_array().nbytes() as u64,
));
group.bench_function("dict_decode_varbinview", |b| {
b.iter_batched(
|| DictArray::try_new(codes.clone().into_array(), values.clone().into_array()).unwrap(),
|dict_arr| black_box(dict_arr.into_canonical().unwrap()),
BatchSize::SmallInput,
);
});

let fsst_compressor = fsst_train_compressor(&varbinview_arr.clone().into_array()).unwrap();
let fsst_array = fsst_compress(&varbinview_arr.clone().into_array(), &fsst_compressor).unwrap();
group.bench_function("fsst_decompress_varbinview", |b| {
b.iter_batched(
|| fsst_array.clone(),
|fsst_arr| black_box(fsst_arr.into_canonical().unwrap()),
BatchSize::SmallInput,
);
});
}

fn gen_varbin_words(len: usize, uniqueness: f64) -> Vec<String> {
let mut rng = thread_rng();
let uniq_cnt = (len as f64 * uniqueness) as usize;
let dict: Vec<String> = (0..uniq_cnt)
.map(|_| {
(&mut rng)
.sample_iter(&Alphanumeric)
.take(8)
.map(char::from)
.collect()
})
.collect();
(0..len)
.map(|_| dict.choose(&mut rng).unwrap().clone())
.collect()
}

criterion_group!(benches, primitive, strings);
criterion_main!(benches);

0 comments on commit 2940ac9

Please sign in to comment.