diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp index 2ad3bc36f59..a93bc05ac58 100644 --- a/cpp/benchmarks/io/csv/csv_reader_input.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,9 +28,7 @@ constexpr size_t data_size = 256 << 20; constexpr cudf::size_type num_cols = 64; template -void csv_read_common(DataType const& data_types, - cudf::io::io_type const& source_type, - nvbench::state& state) +void csv_read_common(DataType const& data_types, io_type const& source_type, nvbench::state& state) { auto const tbl = create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}); @@ -66,7 +64,7 @@ void csv_read_common(DataType const& data_types, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template +template void BM_csv_read_input(nvbench::state& state, nvbench::type_list, nvbench::enum_type>) { @@ -76,7 +74,7 @@ void BM_csv_read_input(nvbench::state& state, csv_read_common(d_type, source_type, state); } -template +template void BM_csv_read_io(nvbench::state& state, nvbench::type_list>) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), @@ -97,12 +95,10 @@ using d_type_list = nvbench::enum_type_list; -using io_list = - nvbench::enum_type_list; +using io_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_csv_read_input, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) + NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list)) .set_name("csv_read_data_type") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4); diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp index 8ff07be1531..7ba43850cf2 100644 --- a/cpp/benchmarks/io/csv/csv_writer.cpp +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ constexpr size_t data_size = 256 << 20; constexpr cudf::size_type num_cols = 64; -template +template void BM_csv_write_dtype_io(nvbench::state& state, nvbench::type_list, nvbench::enum_type>) { @@ -112,9 +112,7 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_csv_write_dtype_io, NVBENCH_TYPE_AXES(d_type_list, io_list)) .set_name("csv_write_dtype_io") diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 3a61e5f1e7b..37ced8ea703 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -52,6 +52,11 @@ cudf::io::source_info cuio_source_sink_pair::make_source_info() switch (type) { case io_type::FILEPATH: return cudf::io::source_info(file_name); case io_type::HOST_BUFFER: return cudf::io::source_info(h_buffer.data(), h_buffer.size()); + case io_type::PINNED_BUFFER: { + pinned_buffer.resize(h_buffer.size()); + std::copy(h_buffer.begin(), h_buffer.end(), pinned_buffer.begin()); + return cudf::io::source_info(pinned_buffer.data(), pinned_buffer.size()); + } case io_type::DEVICE_BUFFER: { // TODO: make cuio_source_sink_pair stream-friendly and avoid implicit use of the default // stream @@ -71,7 +76,8 @@ cudf::io::sink_info cuio_source_sink_pair::make_sink_info() switch (type) { case io_type::VOID: return cudf::io::sink_info(void_sink.get()); case io_type::FILEPATH: return cudf::io::sink_info(file_name); - case io_type::HOST_BUFFER: [[fallthrough]]; + case io_type::HOST_BUFFER: + case io_type::PINNED_BUFFER: case io_type::DEVICE_BUFFER: return cudf::io::sink_info(&h_buffer); default: CUDF_FAIL("invalid output type"); } @@ -84,7 +90,8 @@ size_t cuio_source_sink_pair::size() case io_type::FILEPATH: return static_cast( std::ifstream(file_name, std::ifstream::ate | std::ifstream::binary).tellg()); - case io_type::HOST_BUFFER: [[fallthrough]]; + case io_type::HOST_BUFFER: + case io_type::PINNED_BUFFER: case io_type::DEVICE_BUFFER: return h_buffer.size(); default: CUDF_FAIL("invalid output type"); } @@ -204,13 +211,13 @@ void try_drop_l3_cache() "Failed to execute the drop cache command"); } -cudf::io::io_type retrieve_io_type_enum(std::string_view io_string) +io_type retrieve_io_type_enum(std::string_view io_string) { - if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; } - if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; } - if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; } - if (io_string == "VOID") { return cudf::io::io_type::VOID; } - if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; } + if (io_string == "FILEPATH") { return io_type::FILEPATH; } + if (io_string == "HOST_BUFFER") { return io_type::HOST_BUFFER; } + if (io_string == "PINNED_BUFFER") { return io_type::PINNED_BUFFER; } + if (io_string == "DEVICE_BUFFER") { return io_type::DEVICE_BUFFER; } + if (io_string == "VOID") { return io_type::VOID; } CUDF_FAIL("Unsupported io_type."); } diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 6e0b32219ce..d4f39a5f243 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -18,13 +18,20 @@ #include +#include #include #include -#include #include -using cudf::io::io_type; +// IO types supported in the benchmarks +enum class io_type { + FILEPATH, // Input/output are both files + HOST_BUFFER, // Input/output are both host buffers (pageable) + PINNED_BUFFER, // Input is a pinned host buffer, output is a host buffer (pageable) + DEVICE_BUFFER, // Input is a device buffer, output is a host buffer (pageable) + VOID +}; std::string random_file_in_dir(std::string const& dir_path); @@ -72,6 +79,7 @@ class cuio_source_sink_pair { io_type const type; std::vector h_buffer; + cudf::detail::pinned_host_vector pinned_buffer; rmm::device_uvector d_buffer; std::string const file_name; std::unique_ptr void_sink; @@ -144,7 +152,7 @@ void try_drop_l3_cache(); * * @return The io_type enum value */ -cudf::io::io_type retrieve_io_type_enum(std::string_view io_string); +io_type retrieve_io_type_enum(std::string_view io_string); /** * @brief Convert a string to the corresponding compression_type enum value. diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp index aa73dacdbc5..4366790f208 100644 --- a/cpp/benchmarks/io/json/json_reader_input.cpp +++ b/cpp/benchmarks/io/json/json_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,7 +70,7 @@ cudf::size_type json_write_bm_data(cudf::io::sink_info sink, return view.num_rows(); } -template +template void BM_json_read_io(nvbench::state& state, nvbench::type_list>) { cuio_source_sink_pair source_sink(IO); @@ -87,7 +87,7 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list +template void BM_json_read_data_type( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) { @@ -107,16 +107,14 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = + nvbench::enum_type_list; using compression_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_json_read_data_type, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) + NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list)) .set_name("json_read_data_type") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4); diff --git a/cpp/benchmarks/io/json/json_writer.cpp b/cpp/benchmarks/io/json/json_writer.cpp index ae6bb81ff93..444457bbf0d 100644 --- a/cpp/benchmarks/io/json/json_writer.cpp +++ b/cpp/benchmarks/io/json/json_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,7 +52,7 @@ void json_write_common(cudf::io::json_writer_options const& write_opts, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template +template void BM_json_write_io(nvbench::state& state, nvbench::type_list>) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), @@ -114,9 +114,8 @@ void BM_json_writer_options(nvbench::state& state) json_write_common(write_opts, source_sink, data_size, state); } -using io_list = nvbench::enum_type_list; +using io_list = + nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_json_write_io, NVBENCH_TYPE_AXES(io_list)) .set_name("json_write_io") diff --git a/cpp/benchmarks/io/nvbench_helpers.hpp b/cpp/benchmarks/io/nvbench_helpers.hpp index 8b79912c7ee..1e3ab2b7b4f 100644 --- a/cpp/benchmarks/io/nvbench_helpers.hpp +++ b/cpp/benchmarks/io/nvbench_helpers.hpp @@ -56,13 +56,14 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS( [](auto) { return std::string{}; }) NVBENCH_DECLARE_ENUM_TYPE_STRINGS( - cudf::io::io_type, + io_type, [](auto value) { switch (value) { - case cudf::io::io_type::FILEPATH: return "FILEPATH"; - case cudf::io::io_type::HOST_BUFFER: return "HOST_BUFFER"; - case cudf::io::io_type::DEVICE_BUFFER: return "DEVICE_BUFFER"; - case cudf::io::io_type::VOID: return "VOID"; + case io_type::FILEPATH: return "FILEPATH"; + case io_type::HOST_BUFFER: return "HOST_BUFFER"; + case io_type::PINNED_BUFFER: return "PINNED_BUFFER"; + case io_type::DEVICE_BUFFER: return "DEVICE_BUFFER"; + case io_type::VOID: return "VOID"; default: return "Unknown"; } }, diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index b7c214a8374..cafd3cc5c39 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -87,7 +87,7 @@ void orc_read_common(cudf::size_type num_rows_to_read, } // namespace -template +template void BM_orc_read_data(nvbench::state& state, nvbench::type_list, nvbench::enum_type>) { @@ -112,7 +112,7 @@ void BM_orc_read_data(nvbench::state& state, orc_read_common(num_rows_written, source_sink, state); } -template +template void orc_read_io_compression(nvbench::state& state) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), @@ -150,7 +150,7 @@ void orc_read_io_compression(nvbench::state& state) orc_read_common(num_rows_written, source_sink, state); } -template +template void BM_orc_read_io_compression( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -163,7 +163,7 @@ void BM_orc_chunked_read_io_compression(nvbench::state& state, nvbench::type_list>) { // Only run benchmark using HOST_BUFFER IO. - return orc_read_io_compression(state); + return orc_read_io_compression(state); } using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = + nvbench::enum_type_list; using compression_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_orc_read_data, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) + NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list)) .set_name("orc_read_decode") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4) diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp index bb373297222..b795f3e3164 100644 --- a/cpp/benchmarks/io/orc/orc_writer.cpp +++ b/cpp/benchmarks/io/orc/orc_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -82,7 +82,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list +template void BM_orc_write_io_compression( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -183,9 +183,7 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = nvbench::enum_type_list; using compression_list = nvbench::enum_type_list; diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp index bd80c4e0e88..a67d1932951 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp @@ -62,7 +62,7 @@ std::tuple, size_t, size_t> write_file_data( size_t total_file_size = 0; for (size_t i = 0; i < num_files; ++i) { - cuio_source_sink_pair source_sink{cudf::io::io_type::HOST_BUFFER}; + cuio_source_sink_pair source_sink{io_type::HOST_BUFFER}; auto const tbl = create_random_table( cycle_dtypes(d_types, num_cols), @@ -96,6 +96,11 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, cudf::detail::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); + std::vector source_info_vector; + std::transform(source_sink_vector.begin(), + source_sink_vector.end(), + std::back_inserter(source_info_vector), + [](auto& source_sink) { return source_sink.make_source_info(); }); auto mem_stats_logger = cudf::memory_stats_logger(); @@ -104,9 +109,8 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, [&](nvbench::launch& launch, auto& timer) { auto read_func = [&](int index) { auto const stream = streams[index % num_threads]; - auto& source_sink = source_sink_vector[index]; cudf::io::parquet_reader_options read_opts = - cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + cudf::io::parquet_reader_options::builder(source_info_vector[index]); cudf::io::read_parquet(read_opts, stream, rmm::mr::get_current_device_resource()); }; @@ -174,6 +178,11 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); cudf::detail::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); + std::vector source_info_vector; + std::transform(source_sink_vector.begin(), + source_sink_vector.end(), + std::back_inserter(source_info_vector), + [](auto& source_sink) { return source_sink.make_source_info(); }); auto mem_stats_logger = cudf::memory_stats_logger(); @@ -183,9 +192,8 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, [&](nvbench::launch& launch, auto& timer) { auto read_func = [&](int index) { auto const stream = streams[index % num_threads]; - auto& source_sink = source_sink_vector[index]; cudf::io::parquet_reader_options read_opts = - cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + cudf::io::parquet_reader_options::builder(source_info_vector[index]); // divide chunk limits by number of threads so the number of chunks produced is the // same for all cases. this seems better than the alternative, which is to keep the // limits the same. if we do that, as the number of threads goes up, the number of diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp index 13b396ea267..46d2927a92b 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -82,7 +82,7 @@ void BM_parq_write_encode(nvbench::state& state, nvbench::type_list +template void BM_parq_write_io_compression( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -188,9 +188,7 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = nvbench::enum_type_list; using compression_list = nvbench::enum_type_list;