Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update default data source in cuio reader benchmarks #12740

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions cpp/benchmarks/io/csv/csv_reader_input.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -64,19 +64,20 @@ void csv_read_common(DataType const& data_types,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType>
void BM_csv_read_input(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
template <data_type DataType, cudf::io::io_type IOType>
void BM_csv_read_input(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
cudf::rmm_pool_raii rmm_pool;

auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
auto const source_type = io_type::FILEPATH;
auto const source_type = IOType;

csv_read_common(d_type, source_type, state);
}

template <cudf::io::io_type IO>
void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
template <cudf::io::io_type IOType>
void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
{
cudf::rmm_pool_raii rmm_pool;

Expand All @@ -86,7 +87,7 @@ void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type
static_cast<int32_t>(data_type::TIMESTAMP),
static_cast<int32_t>(data_type::DURATION),
static_cast<int32_t>(data_type::STRING)});
auto const source_type = IO;
auto const source_type = IOType;

csv_read_common(d_type, source_type, state);
}
Expand All @@ -101,9 +102,11 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
using io_list =
nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;

NVBENCH_BENCH_TYPES(BM_csv_read_input, NVBENCH_TYPE_AXES(d_type_list))
NVBENCH_BENCH_TYPES(BM_csv_read_input,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
.set_name("csv_read_data_type")
.set_type_axes_names({"data_type"})
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4);

NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list))
Expand Down
19 changes: 11 additions & 8 deletions cpp/benchmarks/io/orc/orc_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType>
void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
template <data_type DataType, cudf::io::io_type IOType>
void BM_orc_read_data(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
cudf::rmm_pool_raii rmm_pool;

Expand All @@ -72,17 +73,17 @@ void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_ty
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
cuio_source_sink_pair source_sink(IOType);
cudf::io::orc_writer_options opts =
cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view);

orc_read_common(opts, source_sink, state);
}

template <cudf::io::io_type IO, cudf::io::compression_type Compression>
template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
void BM_orc_read_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
{
cudf::rmm_pool_raii rmm_pool;

Expand All @@ -103,7 +104,7 @@ void BM_orc_read_io_compression(
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cuio_source_sink_pair source_sink(IO);
cuio_source_sink_pair source_sink(IOType);
cudf::io::orc_writer_options opts =
cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
.compression(Compression);
Expand All @@ -126,9 +127,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_orc_read_data, NVBENCH_TYPE_AXES(d_type_list))
NVBENCH_BENCH_TYPES(BM_orc_read_data,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
.set_name("orc_read_decode")
.set_type_axes_names({"data_type"})
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});
Expand Down
21 changes: 12 additions & 9 deletions cpp/benchmarks/io/parquet/parquet_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,35 +57,36 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType>
void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
template <data_type DataType, cudf::io::io_type IOType>
void BM_parquet_read_data(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
cudf::rmm_pool_raii rmm_pool;

auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
auto const compression = cudf::io::compression_type::SNAPPY;
auto const source_type = io_type::FILEPATH;

auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
table_size_bytes{data_size},
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cuio_source_sink_pair source_sink(source_type);
cuio_source_sink_pair source_sink(IOType);
cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(compression);

parquet_read_common(write_opts, source_sink, state);
}

template <cudf::io::io_type IO, cudf::io::compression_type Compression>
template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
void BM_parquet_read_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
{
cudf::rmm_pool_raii rmm_pool;

Expand All @@ -101,7 +102,7 @@ void BM_parquet_read_io_compression(
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
auto const compression = Compression;
auto const source_type = IO;
auto const source_type = IOType;

auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
Expand Down Expand Up @@ -133,9 +134,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
NVBENCH_BENCH_TYPES(BM_parquet_read_data,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
.set_name("parquet_read_decode")
.set_type_axes_names({"data_type"})
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});
Expand Down