From e23b6115b8a86985153794f6563aed8a016878c6 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 21 May 2024 16:01:39 -0700 Subject: [PATCH 1/6] separate benchmark io_type from cudf::io --- cpp/benchmarks/io/csv/csv_reader_input.cpp | 14 +++++--------- cpp/benchmarks/io/csv/csv_writer.cpp | 6 ++---- cpp/benchmarks/io/cuio_common.cpp | 11 +++++------ cpp/benchmarks/io/cuio_common.hpp | 10 ++++++++-- cpp/benchmarks/io/json/json_reader_input.cpp | 12 +++++------- cpp/benchmarks/io/json/json_writer.cpp | 7 +++---- cpp/benchmarks/io/nvbench_helpers.hpp | 10 +++++----- cpp/benchmarks/io/orc/orc_reader_input.cpp | 16 +++++++--------- cpp/benchmarks/io/orc/orc_writer.cpp | 6 ++---- cpp/benchmarks/io/parquet/parquet_writer.cpp | 6 ++---- 10 files changed, 44 insertions(+), 54 deletions(-) diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp index 2ad3bc36f59..5860011f3e3 100644 --- a/cpp/benchmarks/io/csv/csv_reader_input.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp @@ -28,9 +28,7 @@ constexpr size_t data_size = 256 << 20; constexpr cudf::size_type num_cols = 64; template -void csv_read_common(DataType const& data_types, - cudf::io::io_type const& source_type, - nvbench::state& state) +void csv_read_common(DataType const& data_types, io_type const& source_type, nvbench::state& state) { auto const tbl = create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}); @@ -66,7 +64,7 @@ void csv_read_common(DataType const& data_types, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template +template void BM_csv_read_input(nvbench::state& state, nvbench::type_list, nvbench::enum_type>) { @@ -76,7 +74,7 @@ void BM_csv_read_input(nvbench::state& state, csv_read_common(d_type, source_type, state); } -template +template void BM_csv_read_io(nvbench::state& state, nvbench::type_list>) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), @@ -97,12 +95,10 @@ using d_type_list = nvbench::enum_type_list; -using io_list = - nvbench::enum_type_list; +using io_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_csv_read_input, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) + NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list)) .set_name("csv_read_data_type") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4); diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp index 8ff07be1531..1a5c63cb1bf 100644 --- a/cpp/benchmarks/io/csv/csv_writer.cpp +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -28,7 +28,7 @@ constexpr size_t data_size = 256 << 20; constexpr cudf::size_type num_cols = 64; -template +template void BM_csv_write_dtype_io(nvbench::state& state, nvbench::type_list, nvbench::enum_type>) { @@ -112,9 +112,7 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_csv_write_dtype_io, NVBENCH_TYPE_AXES(d_type_list, io_list)) .set_name("csv_write_dtype_io") diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 3a61e5f1e7b..380f15dc485 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -204,13 +204,12 @@ void try_drop_l3_cache() "Failed to execute the drop cache command"); } -cudf::io::io_type retrieve_io_type_enum(std::string_view io_string) +io_type retrieve_io_type_enum(std::string_view io_string) { - if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; } - if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; } - if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; } - if (io_string == "VOID") { return cudf::io::io_type::VOID; } - if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; } + if (io_string == "FILEPATH") { return io_type::FILEPATH; } + if (io_string == "HOST_BUFFER") { return io_type::HOST_BUFFER; } + if (io_string == "DEVICE_BUFFER") { return io_type::DEVICE_BUFFER; } + if (io_string == "VOID") { return io_type::VOID; } CUDF_FAIL("Unsupported io_type."); } diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 3d5be41e25f..3921af3340c 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -24,7 +24,13 @@ #include -using cudf::io::io_type; +// IO types supported in the benchmarks +enum class io_type { + FILEPATH, // Input/output are both files + HOST_BUFFER, // Input/output are both host buffers (pageable) + DEVICE_BUFFER, // Input is a device buffer, output is a host buffer (pageable) + VOID +}; std::string random_file_in_dir(std::string const& dir_path); @@ -140,7 +146,7 @@ void try_drop_l3_cache(); * * @return The io_type enum value */ -cudf::io::io_type retrieve_io_type_enum(std::string_view io_string); +io_type retrieve_io_type_enum(std::string_view io_string); /** * @brief Convert a string to the corresponding compression_type enum value. diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp index aa73dacdbc5..77700d3b85a 100644 --- a/cpp/benchmarks/io/json/json_reader_input.cpp +++ b/cpp/benchmarks/io/json/json_reader_input.cpp @@ -70,7 +70,7 @@ cudf::size_type json_write_bm_data(cudf::io::sink_info sink, return view.num_rows(); } -template +template void BM_json_read_io(nvbench::state& state, nvbench::type_list>) { cuio_source_sink_pair source_sink(IO); @@ -87,7 +87,7 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list +template void BM_json_read_data_type( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) { @@ -107,16 +107,14 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = + nvbench::enum_type_list; using compression_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_json_read_data_type, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) + NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list)) .set_name("json_read_data_type") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4); diff --git a/cpp/benchmarks/io/json/json_writer.cpp b/cpp/benchmarks/io/json/json_writer.cpp index ae6bb81ff93..47b567a3f93 100644 --- a/cpp/benchmarks/io/json/json_writer.cpp +++ b/cpp/benchmarks/io/json/json_writer.cpp @@ -52,7 +52,7 @@ void json_write_common(cudf::io::json_writer_options const& write_opts, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template +template void BM_json_write_io(nvbench::state& state, nvbench::type_list>) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), @@ -114,9 +114,8 @@ void BM_json_writer_options(nvbench::state& state) json_write_common(write_opts, source_sink, data_size, state); } -using io_list = nvbench::enum_type_list; +using io_list = + nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_json_write_io, NVBENCH_TYPE_AXES(io_list)) .set_name("json_write_io") diff --git a/cpp/benchmarks/io/nvbench_helpers.hpp b/cpp/benchmarks/io/nvbench_helpers.hpp index 8b79912c7ee..35b6c87bb58 100644 --- a/cpp/benchmarks/io/nvbench_helpers.hpp +++ b/cpp/benchmarks/io/nvbench_helpers.hpp @@ -56,13 +56,13 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS( [](auto) { return std::string{}; }) NVBENCH_DECLARE_ENUM_TYPE_STRINGS( - cudf::io::io_type, + io_type, [](auto value) { switch (value) { - case cudf::io::io_type::FILEPATH: return "FILEPATH"; - case cudf::io::io_type::HOST_BUFFER: return "HOST_BUFFER"; - case cudf::io::io_type::DEVICE_BUFFER: return "DEVICE_BUFFER"; - case cudf::io::io_type::VOID: return "VOID"; + case io_type::FILEPATH: return "FILEPATH"; + case io_type::HOST_BUFFER: return "HOST_BUFFER"; + case io_type::DEVICE_BUFFER: return "DEVICE_BUFFER"; + case io_type::VOID: return "VOID"; default: return "Unknown"; } }, diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp index b7c214a8374..cafd3cc5c39 100644 --- a/cpp/benchmarks/io/orc/orc_reader_input.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -87,7 +87,7 @@ void orc_read_common(cudf::size_type num_rows_to_read, } // namespace -template +template void BM_orc_read_data(nvbench::state& state, nvbench::type_list, nvbench::enum_type>) { @@ -112,7 +112,7 @@ void BM_orc_read_data(nvbench::state& state, orc_read_common(num_rows_written, source_sink, state); } -template +template void orc_read_io_compression(nvbench::state& state) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), @@ -150,7 +150,7 @@ void orc_read_io_compression(nvbench::state& state) orc_read_common(num_rows_written, source_sink, state); } -template +template void BM_orc_read_io_compression( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -163,7 +163,7 @@ void BM_orc_chunked_read_io_compression(nvbench::state& state, nvbench::type_list>) { // Only run benchmark using HOST_BUFFER IO. - return orc_read_io_compression(state); + return orc_read_io_compression(state); } using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = + nvbench::enum_type_list; using compression_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_orc_read_data, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) + NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list)) .set_name("orc_read_decode") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4) diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp index bb373297222..cc0d1d73f2b 100644 --- a/cpp/benchmarks/io/orc/orc_writer.cpp +++ b/cpp/benchmarks/io/orc/orc_writer.cpp @@ -82,7 +82,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list +template void BM_orc_write_io_compression( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -183,9 +183,7 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = nvbench::enum_type_list; using compression_list = nvbench::enum_type_list; diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp index 13b396ea267..92df4cb481c 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp @@ -82,7 +82,7 @@ void BM_parq_write_encode(nvbench::state& state, nvbench::type_list +template void BM_parq_write_io_compression( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -188,9 +188,7 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; +using io_list = nvbench::enum_type_list; using compression_list = nvbench::enum_type_list; From 6a926e5601b79e91d2332962fc536140f8aa24f4 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 21 May 2024 16:51:10 -0700 Subject: [PATCH 2/6] implement pinned source_sink --- cpp/benchmarks/io/cuio_common.cpp | 12 ++++++++++-- cpp/benchmarks/io/cuio_common.hpp | 4 +++- cpp/benchmarks/io/nvbench_helpers.hpp | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 380f15dc485..37ced8ea703 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -52,6 +52,11 @@ cudf::io::source_info cuio_source_sink_pair::make_source_info() switch (type) { case io_type::FILEPATH: return cudf::io::source_info(file_name); case io_type::HOST_BUFFER: return cudf::io::source_info(h_buffer.data(), h_buffer.size()); + case io_type::PINNED_BUFFER: { + pinned_buffer.resize(h_buffer.size()); + std::copy(h_buffer.begin(), h_buffer.end(), pinned_buffer.begin()); + return cudf::io::source_info(pinned_buffer.data(), pinned_buffer.size()); + } case io_type::DEVICE_BUFFER: { // TODO: make cuio_source_sink_pair stream-friendly and avoid implicit use of the default // stream @@ -71,7 +76,8 @@ cudf::io::sink_info cuio_source_sink_pair::make_sink_info() switch (type) { case io_type::VOID: return cudf::io::sink_info(void_sink.get()); case io_type::FILEPATH: return cudf::io::sink_info(file_name); - case io_type::HOST_BUFFER: [[fallthrough]]; + case io_type::HOST_BUFFER: + case io_type::PINNED_BUFFER: case io_type::DEVICE_BUFFER: return cudf::io::sink_info(&h_buffer); default: CUDF_FAIL("invalid output type"); } @@ -84,7 +90,8 @@ size_t cuio_source_sink_pair::size() case io_type::FILEPATH: return static_cast( std::ifstream(file_name, std::ifstream::ate | std::ifstream::binary).tellg()); - case io_type::HOST_BUFFER: [[fallthrough]]; + case io_type::HOST_BUFFER: + case io_type::PINNED_BUFFER: case io_type::DEVICE_BUFFER: return h_buffer.size(); default: CUDF_FAIL("invalid output type"); } @@ -208,6 +215,7 @@ io_type retrieve_io_type_enum(std::string_view io_string) { if (io_string == "FILEPATH") { return io_type::FILEPATH; } if (io_string == "HOST_BUFFER") { return io_type::HOST_BUFFER; } + if (io_string == "PINNED_BUFFER") { return io_type::PINNED_BUFFER; } if (io_string == "DEVICE_BUFFER") { return io_type::DEVICE_BUFFER; } if (io_string == "VOID") { return io_type::VOID; } CUDF_FAIL("Unsupported io_type."); diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 3921af3340c..f1b9c5be6a3 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -18,9 +18,9 @@ #include +#include #include #include -#include #include @@ -28,6 +28,7 @@ enum class io_type { FILEPATH, // Input/output are both files HOST_BUFFER, // Input/output are both host buffers (pageable) + PINNED_BUFFER, // Input is a pinned host buffer, output is a host buffer (pageable) DEVICE_BUFFER, // Input is a device buffer, output is a host buffer (pageable) VOID }; @@ -74,6 +75,7 @@ class cuio_source_sink_pair { io_type const type; std::vector h_buffer; + cudf::detail::pinned_host_vector pinned_buffer; rmm::device_uvector d_buffer; std::string const file_name; std::unique_ptr void_sink; diff --git a/cpp/benchmarks/io/nvbench_helpers.hpp b/cpp/benchmarks/io/nvbench_helpers.hpp index 35b6c87bb58..1e3ab2b7b4f 100644 --- a/cpp/benchmarks/io/nvbench_helpers.hpp +++ b/cpp/benchmarks/io/nvbench_helpers.hpp @@ -61,6 +61,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS( switch (value) { case io_type::FILEPATH: return "FILEPATH"; case io_type::HOST_BUFFER: return "HOST_BUFFER"; + case io_type::PINNED_BUFFER: return "PINNED_BUFFER"; case io_type::DEVICE_BUFFER: return "DEVICE_BUFFER"; case io_type::VOID: return "VOID"; default: return "Unknown"; From 286bc3e6dbabd2b376ba53c2c77cb92a27b32f6f Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 22 May 2024 15:08:55 -0700 Subject: [PATCH 3/6] style --- cpp/benchmarks/io/csv/csv_reader_input.cpp | 2 +- cpp/benchmarks/io/csv/csv_writer.cpp | 2 +- cpp/benchmarks/io/json/json_reader_input.cpp | 2 +- cpp/benchmarks/io/json/json_writer.cpp | 2 +- cpp/benchmarks/io/orc/orc_writer.cpp | 2 +- cpp/benchmarks/io/orc/orc_writer_chunks.cpp | 2 +- cpp/benchmarks/io/parquet/parquet_writer.cpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp index 5860011f3e3..a93bc05ac58 100644 --- a/cpp/benchmarks/io/csv/csv_reader_input.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp index 1a5c63cb1bf..7ba43850cf2 100644 --- a/cpp/benchmarks/io/csv/csv_writer.cpp +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp index 77700d3b85a..4366790f208 100644 --- a/cpp/benchmarks/io/json/json_reader_input.cpp +++ b/cpp/benchmarks/io/json/json_reader_input.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/json/json_writer.cpp b/cpp/benchmarks/io/json/json_writer.cpp index 47b567a3f93..444457bbf0d 100644 --- a/cpp/benchmarks/io/json/json_writer.cpp +++ b/cpp/benchmarks/io/json/json_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp index cc0d1d73f2b..b795f3e3164 100644 --- a/cpp/benchmarks/io/orc/orc_writer.cpp +++ b/cpp/benchmarks/io/orc/orc_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp index dff88d7ab6c..81ecee22e1e 100644 --- a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp +++ b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp index 92df4cb481c..46d2927a92b 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From eb065e41793494b00c2efc32f663bcb80ccbb481 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 22 May 2024 16:02:52 -0700 Subject: [PATCH 4/6] move make_source_info out of read_func --- .../io/parquet/parquet_reader_multithread.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp index fbdcfb0ade9..78abb531cfc 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp @@ -75,7 +75,7 @@ std::tuple, size_t, size_t> write_file_data( size_t total_file_size = 0; for (size_t i = 0; i < num_files; ++i) { - cuio_source_sink_pair source_sink{cudf::io::io_type::HOST_BUFFER}; + cuio_source_sink_pair source_sink{io_type::HOST_BUFFER}; auto const tbl = create_random_table( cycle_dtypes(d_types, num_cols), @@ -111,6 +111,10 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, cudf::detail::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); + std::vector source_info_vector; + for (auto& source_sink : source_sink_vector) { + source_info_vector.push_back(source_sink.make_source_info()); + } auto mem_stats_logger = cudf::memory_stats_logger(); @@ -119,9 +123,8 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, [&](nvbench::launch& launch, auto& timer) { auto read_func = [&](int index) { auto const stream = streams[index % num_threads]; - auto& source_sink = source_sink_vector[index]; cudf::io::parquet_reader_options read_opts = - cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + cudf::io::parquet_reader_options::builder(source_info_vector[index]); cudf::io::read_parquet(read_opts, stream, rmm::mr::get_current_device_resource()); }; @@ -191,6 +194,10 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); cudf::detail::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); + std::vector source_info_vector; + for (auto& source_sink : source_sink_vector) { + source_info_vector.push_back(source_sink.make_source_info()); + } auto mem_stats_logger = cudf::memory_stats_logger(); @@ -200,9 +207,8 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, [&](nvbench::launch& launch, auto& timer) { auto read_func = [&](int index) { auto const stream = streams[index % num_threads]; - auto& source_sink = source_sink_vector[index]; cudf::io::parquet_reader_options read_opts = - cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + cudf::io::parquet_reader_options::builder(source_info_vector[index]); // divide chunk limits by number of threads so the number of chunks produced is the // same for all cases. this seems better than the alternative, which is to keep the // limits the same. if we do that, as the number of threads goes up, the number of From f1b63b268e038d5d8ddb03fe3b668532e9740b92 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 22 May 2024 16:04:01 -0700 Subject: [PATCH 5/6] style fix --- cpp/benchmarks/io/orc/orc_writer_chunks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp index 81ecee22e1e..dff88d7ab6c 100644 --- a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp +++ b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 5c7e4c2129ae06b55067cbeec79c0505e7bfcaf3 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 31 May 2024 14:52:07 -0700 Subject: [PATCH 6/6] transform --- .../io/parquet/parquet_reader_multithread.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp index 20148a0752d..a67d1932951 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp @@ -97,9 +97,10 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); std::vector source_info_vector; - for (auto& source_sink : source_sink_vector) { - source_info_vector.push_back(source_sink.make_source_info()); - } + std::transform(source_sink_vector.begin(), + source_sink_vector.end(), + std::back_inserter(source_info_vector), + [](auto& source_sink) { return source_sink.make_source_info(); }); auto mem_stats_logger = cudf::memory_stats_logger(); @@ -178,9 +179,10 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, cudf::detail::thread_pool threads(num_threads); auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); std::vector source_info_vector; - for (auto& source_sink : source_sink_vector) { - source_info_vector.push_back(source_sink.make_source_info()); - } + std::transform(source_sink_vector.begin(), + source_sink_vector.end(), + std::back_inserter(source_info_vector), + [](auto& source_sink) { return source_sink.make_source_info(); }); auto mem_stats_logger = cudf::memory_stats_logger();