rapidsai · rapids-bot · Jun 3, 2024 · May 21, 2024 · May 21, 2024 · May 21, 2024
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,9 +28,7 @@ constexpr size_t data_size         = 256 << 20;
 constexpr cudf::size_type num_cols = 64;
 
 template <typename DataType>
-void csv_read_common(DataType const& data_types,
-                     cudf::io::io_type const& source_type,
-                     nvbench::state& state)
+void csv_read_common(DataType const& data_types, io_type const& source_type, nvbench::state& state)
 {
   auto const tbl =
     create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size});
@@ -66,7 +64,7 @@ void csv_read_common(DataType const& data_types,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType, cudf::io::io_type IOType>
+template <data_type DataType, io_type IOType>
 void BM_csv_read_input(nvbench::state& state,
                        nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
@@ -76,7 +74,7 @@ void BM_csv_read_input(nvbench::state& state,
   csv_read_common(d_type, source_type, state);
 }
 
-template <cudf::io::io_type IOType>
+template <io_type IOType>
 void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
 {
   auto const d_type      = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
@@ -97,12 +95,10 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
                                             data_type::DURATION,
                                             data_type::STRING>;
 
-using io_list =
-  nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;
+using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER>;
 
 NVBENCH_BENCH_TYPES(BM_csv_read_input,
-                    NVBENCH_TYPE_AXES(d_type_list,
-                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
+                    NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
   .set_name("csv_read_data_type")
   .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4);

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,7 +28,7 @@
 constexpr size_t data_size         = 256 << 20;
 constexpr cudf::size_type num_cols = 64;
 
-template <data_type DataType, cudf::io::io_type IO>
+template <data_type DataType, io_type IO>
 void BM_csv_write_dtype_io(nvbench::state& state,
                            nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
 {
@@ -112,9 +112,7 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
                                             data_type::DURATION,
                                             data_type::STRING>;
 
-using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
-                                        cudf::io::io_type::HOST_BUFFER,
-                                        cudf::io::io_type::VOID>;
+using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::VOID>;
 
 NVBENCH_BENCH_TYPES(BM_csv_write_dtype_io, NVBENCH_TYPE_AXES(d_type_list, io_list))
   .set_name("csv_write_dtype_io")

@@ -52,6 +52,11 @@ cudf::io::source_info cuio_source_sink_pair::make_source_info()
   switch (type) {
     case io_type::FILEPATH: return cudf::io::source_info(file_name);
     case io_type::HOST_BUFFER: return cudf::io::source_info(h_buffer.data(), h_buffer.size());
+    case io_type::PINNED_BUFFER: {
+      pinned_buffer.resize(h_buffer.size());
+      std::copy(h_buffer.begin(), h_buffer.end(), pinned_buffer.begin());
+      return cudf::io::source_info(pinned_buffer.data(), pinned_buffer.size());
+    }
     case io_type::DEVICE_BUFFER: {
       // TODO: make cuio_source_sink_pair stream-friendly and avoid implicit use of the default
       // stream
@@ -71,7 +76,8 @@ cudf::io::sink_info cuio_source_sink_pair::make_sink_info()
   switch (type) {
     case io_type::VOID: return cudf::io::sink_info(void_sink.get());
     case io_type::FILEPATH: return cudf::io::sink_info(file_name);
-    case io_type::HOST_BUFFER: [[fallthrough]];
+    case io_type::HOST_BUFFER:
+    case io_type::PINNED_BUFFER:
     case io_type::DEVICE_BUFFER: return cudf::io::sink_info(&h_buffer);
     default: CUDF_FAIL("invalid output type");
   }
@@ -84,7 +90,8 @@ size_t cuio_source_sink_pair::size()
     case io_type::FILEPATH:
       return static_cast<size_t>(
         std::ifstream(file_name, std::ifstream::ate | std::ifstream::binary).tellg());
-    case io_type::HOST_BUFFER: [[fallthrough]];
+    case io_type::HOST_BUFFER:
+    case io_type::PINNED_BUFFER:
     case io_type::DEVICE_BUFFER: return h_buffer.size();
     default: CUDF_FAIL("invalid output type");
   }
@@ -204,13 +211,13 @@ void try_drop_l3_cache()
                "Failed to execute the drop cache command");
 }
 
-cudf::io::io_type retrieve_io_type_enum(std::string_view io_string)
+io_type retrieve_io_type_enum(std::string_view io_string)
 {
-  if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; }
-  if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; }
-  if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; }
-  if (io_string == "VOID") { return cudf::io::io_type::VOID; }
-  if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; }
+  if (io_string == "FILEPATH") { return io_type::FILEPATH; }
+  if (io_string == "HOST_BUFFER") { return io_type::HOST_BUFFER; }
+  if (io_string == "PINNED_BUFFER") { return io_type::PINNED_BUFFER; }
+  if (io_string == "DEVICE_BUFFER") { return io_type::DEVICE_BUFFER; }
+  if (io_string == "VOID") { return io_type::VOID; }
   CUDF_FAIL("Unsupported io_type.");
 }
 

@@ -18,13 +18,20 @@
 
 #include <cudf_test/file_utilities.hpp>
 
+#include <cudf/detail/utilities/pinned_host_vector.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
-#include <cudf/io/types.hpp>
 
 #include <rmm/device_uvector.hpp>
 
-using cudf::io::io_type;
+// IO types supported in the benchmarks
+enum class io_type {
+  FILEPATH,       // Input/output are both files
+  HOST_BUFFER,    // Input/output are both host buffers (pageable)
+  PINNED_BUFFER,  // Input is a pinned host buffer, output is a host buffer (pageable)
+  DEVICE_BUFFER,  // Input is a device buffer, output is a host buffer (pageable)
+  VOID
+};
 
 std::string random_file_in_dir(std::string const& dir_path);
 
@@ -72,6 +79,7 @@ class cuio_source_sink_pair {
 
   io_type const type;
   std::vector<char> h_buffer;
+  cudf::detail::pinned_host_vector<char> pinned_buffer;
   rmm::device_uvector<std::byte> d_buffer;
   std::string const file_name;
   std::unique_ptr<cudf::io::data_sink> void_sink;
@@ -144,7 +152,7 @@ void try_drop_l3_cache();
  *
  * @return The io_type enum value
  */
-cudf::io::io_type retrieve_io_type_enum(std::string_view io_string);
+io_type retrieve_io_type_enum(std::string_view io_string);
 
 /**
  * @brief Convert a string to the corresponding compression_type enum value.

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -70,7 +70,7 @@ cudf::size_type json_write_bm_data(cudf::io::sink_info sink,
   return view.num_rows();
 }
 
-template <cudf::io::io_type IO>
+template <io_type IO>
 void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
 {
   cuio_source_sink_pair source_sink(IO);
@@ -87,7 +87,7 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_typ
   json_read_common(source_sink, num_rows, state);
 }
 
-template <data_type DataType, cudf::io::io_type IO>
+template <data_type DataType, io_type IO>
 void BM_json_read_data_type(
   nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
 {
@@ -107,16 +107,14 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
                                             data_type::LIST,
                                             data_type::STRUCT>;
 
-using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
-                                        cudf::io::io_type::HOST_BUFFER,
-                                        cudf::io::io_type::DEVICE_BUFFER>;
+using io_list =
+  nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;
 
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
 
 NVBENCH_BENCH_TYPES(BM_json_read_data_type,
-                    NVBENCH_TYPE_AXES(d_type_list,
-                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
+                    NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
   .set_name("json_read_data_type")
   .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4);

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -52,7 +52,7 @@ void json_write_common(cudf::io::json_writer_options const& write_opts,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <cudf::io::io_type IO>
+template <io_type IO>
 void BM_json_write_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
 {
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
@@ -114,9 +114,8 @@ void BM_json_writer_options(nvbench::state& state)
   json_write_common(write_opts, source_sink, data_size, state);
 }
 
-using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
-                                        cudf::io::io_type::HOST_BUFFER,
-                                        cudf::io::io_type::DEVICE_BUFFER>;
+using io_list =
+  nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;
 
 NVBENCH_BENCH_TYPES(BM_json_write_io, NVBENCH_TYPE_AXES(io_list))
   .set_name("json_write_io")

@@ -56,13 +56,14 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
   [](auto) { return std::string{}; })
 
 NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
-  cudf::io::io_type,
+  io_type,
   [](auto value) {
     switch (value) {
-      case cudf::io::io_type::FILEPATH: return "FILEPATH";
-      case cudf::io::io_type::HOST_BUFFER: return "HOST_BUFFER";
-      case cudf::io::io_type::DEVICE_BUFFER: return "DEVICE_BUFFER";
-      case cudf::io::io_type::VOID: return "VOID";
+      case io_type::FILEPATH: return "FILEPATH";
+      case io_type::HOST_BUFFER: return "HOST_BUFFER";
+      case io_type::PINNED_BUFFER: return "PINNED_BUFFER";
+      case io_type::DEVICE_BUFFER: return "DEVICE_BUFFER";
+      case io_type::VOID: return "VOID";
       default: return "Unknown";
     }
   },

@@ -87,7 +87,7 @@ void orc_read_common(cudf::size_type num_rows_to_read,
 
 }  // namespace
 
-template <data_type DataType, cudf::io::io_type IOType>
+template <data_type DataType, io_type IOType>
 void BM_orc_read_data(nvbench::state& state,
                       nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
@@ -112,7 +112,7 @@ void BM_orc_read_data(nvbench::state& state,
   orc_read_common<false>(num_rows_written, source_sink, state);
 }
 
-template <cudf::io::io_type IOType, cudf::io::compression_type Compression, bool chunked_read>
+template <io_type IOType, cudf::io::compression_type Compression, bool chunked_read>
 void orc_read_io_compression(nvbench::state& state)
 {
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
@@ -150,7 +150,7 @@ void orc_read_io_compression(nvbench::state& state)
   orc_read_common<chunked_read>(num_rows_written, source_sink, state);
 }
 
-template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
+template <io_type IOType, cudf::io::compression_type Compression>
 void BM_orc_read_io_compression(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
@@ -163,7 +163,7 @@ void BM_orc_chunked_read_io_compression(nvbench::state& state,
                                         nvbench::type_list<nvbench::enum_type<Compression>>)
 {
   // Only run benchmark using HOST_BUFFER IO.
-  return orc_read_io_compression<cudf::io::io_type::HOST_BUFFER, Compression, true>(state);
+  return orc_read_io_compression<io_type::HOST_BUFFER, Compression, true>(state);
 }
 
 using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
@@ -174,16 +174,14 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
                                             data_type::LIST,
                                             data_type::STRUCT>;
 
-using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
-                                        cudf::io::io_type::HOST_BUFFER,
-                                        cudf::io::io_type::DEVICE_BUFFER>;
+using io_list =
+  nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;
 
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
 
 NVBENCH_BENCH_TYPES(BM_orc_read_data,
-                    NVBENCH_TYPE_AXES(d_type_list,
-                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
+                    NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
   .set_name("orc_read_decode")
   .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4)

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -82,7 +82,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum
   state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-template <cudf::io::io_type IO, cudf::io::compression_type Compression>
+template <io_type IO, cudf::io::compression_type Compression>
 void BM_orc_write_io_compression(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
@@ -183,9 +183,7 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
                                             data_type::LIST,
                                             data_type::STRUCT>;
 
-using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
-                                        cudf::io::io_type::HOST_BUFFER,
-                                        cudf::io::io_type::VOID>;
+using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::VOID>;
 
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

@@ -62,7 +62,7 @@ std::tuple<std::vector<cuio_source_sink_pair>, size_t, size_t> write_file_data(
   size_t total_file_size = 0;
 
   for (size_t i = 0; i < num_files; ++i) {
-    cuio_source_sink_pair source_sink{cudf::io::io_type::HOST_BUFFER};
+    cuio_source_sink_pair source_sink{io_type::HOST_BUFFER};
 
     auto const tbl = create_random_table(
       cycle_dtypes(d_types, num_cols),
@@ -96,6 +96,10 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state,
   cudf::detail::thread_pool threads(num_threads);
 
   auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types);
+  std::vector<cudf::io::source_info> source_info_vector;
+  for (auto& source_sink : source_sink_vector) {
+    source_info_vector.push_back(source_sink.make_source_info());
+  }
 
   auto mem_stats_logger = cudf::memory_stats_logger();
 
@@ -104,9 +108,8 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state,
              [&](nvbench::launch& launch, auto& timer) {
                auto read_func = [&](int index) {
                  auto const stream = streams[index % num_threads];
-                 auto& source_sink = source_sink_vector[index];
                  cudf::io::parquet_reader_options read_opts =
-                   cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
+                   cudf::io::parquet_reader_options::builder(source_info_vector[index]);
                  cudf::io::read_parquet(read_opts, stream, rmm::mr::get_current_device_resource());
                };
 
@@ -174,6 +177,10 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state,
   auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
   cudf::detail::thread_pool threads(num_threads);
   auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types);
+  std::vector<cudf::io::source_info> source_info_vector;
+  for (auto& source_sink : source_sink_vector) {
+    source_info_vector.push_back(source_sink.make_source_info());
+  }
 
   auto mem_stats_logger = cudf::memory_stats_logger();
 
@@ -183,9 +190,8 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state,
              [&](nvbench::launch& launch, auto& timer) {
                auto read_func = [&](int index) {
                  auto const stream = streams[index % num_threads];
-                 auto& source_sink = source_sink_vector[index];
                  cudf::io::parquet_reader_options read_opts =
-                   cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
+                   cudf::io::parquet_reader_options::builder(source_info_vector[index]);
                  // divide chunk limits by number of threads so the number of chunks produced is the
                  // same for all cases. this seems better than the alternative, which is to keep the
                  // limits the same. if we do that, as the number of threads goes up, the number of