Skip to content

Commit

Permalink
Update default data source in cuio reader benchmarks (#12740)
Browse files Browse the repository at this point in the history
Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: #12740
  • Loading branch information
PointKernel authored Feb 10, 2023
1 parent 0cab19a commit c931d5a
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 26 deletions.
21 changes: 12 additions & 9 deletions cpp/benchmarks/io/csv/csv_reader_input.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -64,19 +64,20 @@ void csv_read_common(DataType const& data_types,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType>
void BM_csv_read_input(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
template <data_type DataType, cudf::io::io_type IOType>
void BM_csv_read_input(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
cudf::rmm_pool_raii rmm_pool;

auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
auto const source_type = io_type::FILEPATH;
auto const source_type = IOType;

csv_read_common(d_type, source_type, state);
}

template <cudf::io::io_type IO>
void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
template <cudf::io::io_type IOType>
void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
{
cudf::rmm_pool_raii rmm_pool;

Expand All @@ -86,7 +87,7 @@ void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type
static_cast<int32_t>(data_type::TIMESTAMP),
static_cast<int32_t>(data_type::DURATION),
static_cast<int32_t>(data_type::STRING)});
auto const source_type = IO;
auto const source_type = IOType;

csv_read_common(d_type, source_type, state);
}
Expand All @@ -101,9 +102,11 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
using io_list =
nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;

NVBENCH_BENCH_TYPES(BM_csv_read_input, NVBENCH_TYPE_AXES(d_type_list))
NVBENCH_BENCH_TYPES(BM_csv_read_input,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
.set_name("csv_read_data_type")
.set_type_axes_names({"data_type"})
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4);

NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list))
Expand Down
19 changes: 11 additions & 8 deletions cpp/benchmarks/io/orc/orc_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType>
void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
template <data_type DataType, cudf::io::io_type IOType>
void BM_orc_read_data(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
cudf::rmm_pool_raii rmm_pool;

Expand All @@ -72,17 +73,17 @@ void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_ty
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
cuio_source_sink_pair source_sink(IOType);
cudf::io::orc_writer_options opts =
cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view);

orc_read_common(opts, source_sink, state);
}

template <cudf::io::io_type IO, cudf::io::compression_type Compression>
template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
void BM_orc_read_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
{
cudf::rmm_pool_raii rmm_pool;

Expand All @@ -103,7 +104,7 @@ void BM_orc_read_io_compression(
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cuio_source_sink_pair source_sink(IO);
cuio_source_sink_pair source_sink(IOType);
cudf::io::orc_writer_options opts =
cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
.compression(Compression);
Expand All @@ -126,9 +127,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_orc_read_data, NVBENCH_TYPE_AXES(d_type_list))
NVBENCH_BENCH_TYPES(BM_orc_read_data,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
.set_name("orc_read_decode")
.set_type_axes_names({"data_type"})
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});
Expand Down
21 changes: 12 additions & 9 deletions cpp/benchmarks/io/parquet/parquet_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,35 +57,36 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType>
void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
template <data_type DataType, cudf::io::io_type IOType>
void BM_parquet_read_data(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
cudf::rmm_pool_raii rmm_pool;

auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
auto const compression = cudf::io::compression_type::SNAPPY;
auto const source_type = io_type::FILEPATH;

auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
table_size_bytes{data_size},
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cuio_source_sink_pair source_sink(source_type);
cuio_source_sink_pair source_sink(IOType);
cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(compression);

parquet_read_common(write_opts, source_sink, state);
}

template <cudf::io::io_type IO, cudf::io::compression_type Compression>
template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
void BM_parquet_read_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
{
cudf::rmm_pool_raii rmm_pool;

Expand All @@ -101,7 +102,7 @@ void BM_parquet_read_io_compression(
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
auto const compression = Compression;
auto const source_type = IO;
auto const source_type = IOType;

auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
Expand Down Expand Up @@ -133,9 +134,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
NVBENCH_BENCH_TYPES(BM_parquet_read_data,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
.set_name("parquet_read_decode")
.set_type_axes_names({"data_type"})
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});
Expand Down

0 comments on commit c931d5a

Please sign in to comment.