From 37b0a41c87e5a1d64265b9ebc561827bcbc580ac Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 8 Feb 2023 18:03:32 -0500 Subject: [PATCH 1/3] Change default data source type in orc reader benchmark --- cpp/benchmarks/io/orc/orc_reader_input.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 3f8c096140e..76054e825b9 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -72,7 +72,7 @@ void BM_orc_read_data(nvbench::state& state, nvbench::type_listview(); - cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); + cuio_source_sink_pair source_sink(io_type::FILEPATH); cudf::io::orc_writer_options opts = cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view); From 7a11dbf9b5549403c9364befacd5bfc1a4fd090c Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 9 Feb 2023 17:50:07 -0500 Subject: [PATCH 2/3] Set DEVICE_BUFFER as default data source --- cpp/benchmarks/io/csv/csv_reader_input.cpp | 15 +++++++++------ cpp/benchmarks/io/orc/orc_reader_input.cpp | 13 ++++++++----- .../io/parquet/parquet_reader_input.cpp | 14 ++++++++------ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp index 27fea856332..835e86e172a 100644 --- a/cpp/benchmarks/io/csv/csv_reader_input.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,13 +64,14 @@ void csv_read_common(DataType const& data_types, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template -void BM_csv_read_input(nvbench::state& state, nvbench::type_list>) +template +void BM_csv_read_input(nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) { cudf::rmm_pool_raii rmm_pool; auto const d_type = get_type_or_group(static_cast(DataType)); - auto const source_type = io_type::FILEPATH; + auto const source_type = IO; csv_read_common(d_type, source_type, state); } @@ -101,9 +102,11 @@ using d_type_list = nvbench::enum_type_list; -NVBENCH_BENCH_TYPES(BM_csv_read_input, NVBENCH_TYPE_AXES(d_type_list)) +NVBENCH_BENCH_TYPES(BM_csv_read_input, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) .set_name("csv_read_data_type") - .set_type_axes_names({"data_type"}) + .set_type_axes_names({"data_type", "io"}) .set_min_samples(4); NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list)) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 76054e825b9..3d36aee322a 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -57,8 +57,9 @@ void orc_read_common(cudf::io::orc_writer_options const& opts, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template -void BM_orc_read_data(nvbench::state& state, nvbench::type_list>) +template +void BM_orc_read_data(nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) { cudf::rmm_pool_raii rmm_pool; @@ -72,7 +73,7 @@ void BM_orc_read_data(nvbench::state& state, nvbench::type_listview(); - cuio_source_sink_pair source_sink(io_type::FILEPATH); + cuio_source_sink_pair source_sink(IO); cudf::io::orc_writer_options opts = cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view); @@ -126,9 +127,11 @@ using io_list = nvbench::enum_type_list; -NVBENCH_BENCH_TYPES(BM_orc_read_data, NVBENCH_TYPE_AXES(d_type_list)) +NVBENCH_BENCH_TYPES(BM_orc_read_data, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) .set_name("orc_read_decode") - .set_type_axes_names({"data_type"}) + .set_type_axes_names({"data_type", "io"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 36a62903f31..3f3e932710d 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -57,8 +57,9 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template -void BM_parquet_read_data(nvbench::state& state, nvbench::type_list>) +template +void BM_parquet_read_data(nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) { cudf::rmm_pool_raii rmm_pool; @@ -66,7 +67,6 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_listview(); - cuio_source_sink_pair source_sink(source_type); + cuio_source_sink_pair source_sink(IO); cudf::io::parquet_writer_options write_opts = cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) .compression(compression); @@ -133,9 +133,11 @@ using io_list = nvbench::enum_type_list; -NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list)) +NVBENCH_BENCH_TYPES(BM_parquet_read_data, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) .set_name("parquet_read_decode") - .set_type_axes_names({"data_type"}) + .set_type_axes_names({"data_type", "io"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); From a4030db221cfad253ecf2a89ce02a273413aa8ad Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 10 Feb 2023 09:33:30 -0500 Subject: [PATCH 3/3] Renaming: IOType instead of IO --- cpp/benchmarks/io/csv/csv_reader_input.cpp | 12 ++++++------ cpp/benchmarks/io/orc/orc_reader_input.cpp | 12 ++++++------ .../io/parquet/parquet_reader_input.cpp | 15 ++++++++------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp index 835e86e172a..a68f689e4db 100644 --- a/cpp/benchmarks/io/csv/csv_reader_input.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp @@ -64,20 +64,20 @@ void csv_read_common(DataType const& data_types, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template +template void BM_csv_read_input(nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) + nvbench::type_list, nvbench::enum_type>) { cudf::rmm_pool_raii rmm_pool; auto const d_type = get_type_or_group(static_cast(DataType)); - auto const source_type = IO; + auto const source_type = IOType; csv_read_common(d_type, source_type, state); } -template -void BM_csv_read_io(nvbench::state& state, nvbench::type_list>) +template +void BM_csv_read_io(nvbench::state& state, nvbench::type_list>) { cudf::rmm_pool_raii rmm_pool; @@ -87,7 +87,7 @@ void BM_csv_read_io(nvbench::state& state, nvbench::type_list(data_type::TIMESTAMP), static_cast(data_type::DURATION), static_cast(data_type::STRING)}); - auto const source_type = IO; + auto const source_type = IOType; csv_read_common(d_type, source_type, state); } diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index 3d36aee322a..a57a12debc6 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -57,9 +57,9 @@ void orc_read_common(cudf::io::orc_writer_options const& opts, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template +template void BM_orc_read_data(nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) + nvbench::type_list, nvbench::enum_type>) { cudf::rmm_pool_raii rmm_pool; @@ -73,17 +73,17 @@ void BM_orc_read_data(nvbench::state& state, data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); - cuio_source_sink_pair source_sink(IO); + cuio_source_sink_pair source_sink(IOType); cudf::io::orc_writer_options opts = cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view); orc_read_common(opts, source_sink, state); } -template +template void BM_orc_read_io_compression( nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) + nvbench::type_list, nvbench::enum_type>) { cudf::rmm_pool_raii rmm_pool; @@ -104,7 +104,7 @@ void BM_orc_read_io_compression( data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); - cuio_source_sink_pair source_sink(IO); + cuio_source_sink_pair source_sink(IOType); cudf::io::orc_writer_options opts = cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view) .compression(Compression); diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 3f3e932710d..fba69cb2b0f 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -57,9 +57,10 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template -void BM_parquet_read_data(nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +template +void BM_parquet_read_data( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) { cudf::rmm_pool_raii rmm_pool; @@ -74,7 +75,7 @@ void BM_parquet_read_data(nvbench::state& state, data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); - cuio_source_sink_pair source_sink(IO); + cuio_source_sink_pair source_sink(IOType); cudf::io::parquet_writer_options write_opts = cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) .compression(compression); @@ -82,10 +83,10 @@ void BM_parquet_read_data(nvbench::state& state, parquet_read_common(write_opts, source_sink, state); } -template +template void BM_parquet_read_io_compression( nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) + nvbench::type_list, nvbench::enum_type>) { cudf::rmm_pool_raii rmm_pool; @@ -101,7 +102,7 @@ void BM_parquet_read_io_compression( cudf::size_type const cardinality = state.get_int64("cardinality"); cudf::size_type const run_length = state.get_int64("run_length"); auto const compression = Compression; - auto const source_type = IO; + auto const source_type = IOType; auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols),