Update default data source in cuio reader benchmarks (#12740)

Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: #12740
rapidsai · Feb 10, 2023 · c931d5a · c931d5a
1 parent 0cab19a
commit c931d5a
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 26 deletions.
diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,19 +64,20 @@ void csv_read_common(DataType const& data_types,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_csv_read_input(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_csv_read_input(nvbench::state& state,
+                       nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
   auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
-  auto const source_type = io_type::FILEPATH;
+  auto const source_type = IOType;
 
   csv_read_common(d_type, source_type, state);
 }
 
-template <cudf::io::io_type IO>
-void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
+template <cudf::io::io_type IOType>
+void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -86,7 +87,7 @@ void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type
                                          static_cast<int32_t>(data_type::TIMESTAMP),
                                          static_cast<int32_t>(data_type::DURATION),
                                          static_cast<int32_t>(data_type::STRING)});
-  auto const source_type = IO;
+  auto const source_type = IOType;
 
   csv_read_common(d_type, source_type, state);
 }
@@ -101,9 +102,11 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
 using io_list =
   nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;
 
-NVBENCH_BENCH_TYPES(BM_csv_read_input, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_csv_read_input,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("csv_read_data_type")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4);
 
 NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list))

diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -57,8 +57,9 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_orc_read_data(nvbench::state& state,
+                      nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -72,17 +73,17 @@ void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_ty
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::orc_writer_options opts =
     cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view);
 
   orc_read_common(opts, source_sink, state);
 }
 
-template <cudf::io::io_type IO, cudf::io::compression_type Compression>
+template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
 void BM_orc_read_io_compression(
   nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
+  nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -103,7 +104,7 @@ void BM_orc_read_io_compression(
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(IO);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::orc_writer_options opts =
     cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
       .compression(Compression);
@@ -126,9 +127,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
 
-NVBENCH_BENCH_TYPES(BM_orc_read_data, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_orc_read_data,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("orc_read_decode")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});

diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -57,35 +57,36 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_parquet_read_data(
+  nvbench::state& state,
+  nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
   auto const compression            = cudf::io::compression_type::SNAPPY;
-  auto const source_type            = io_type::FILEPATH;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
                         table_size_bytes{data_size},
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(source_type);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::parquet_writer_options write_opts =
     cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
       .compression(compression);
 
   parquet_read_common(write_opts, source_sink, state);
 }
 
-template <cudf::io::io_type IO, cudf::io::compression_type Compression>
+template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
 void BM_parquet_read_io_compression(
   nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
+  nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -101,7 +102,7 @@ void BM_parquet_read_io_compression(
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
   auto const compression            = Compression;
-  auto const source_type            = IO;
+  auto const source_type            = IOType;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
@@ -133,9 +134,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
 
-NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_parquet_read_data,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("parquet_read_decode")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});