Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an option to run cuIO benchmarks with pinned buffers as input #15830

Merged
merged 12 commits into from
Jun 3, 2024
Merged
16 changes: 6 additions & 10 deletions cpp/benchmarks/io/csv/csv_reader_input.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,9 +28,7 @@ constexpr size_t data_size = 256 << 20;
constexpr cudf::size_type num_cols = 64;

template <typename DataType>
void csv_read_common(DataType const& data_types,
cudf::io::io_type const& source_type,
nvbench::state& state)
void csv_read_common(DataType const& data_types, io_type const& source_type, nvbench::state& state)
{
auto const tbl =
create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size});
Expand Down Expand Up @@ -66,7 +64,7 @@ void csv_read_common(DataType const& data_types,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType, cudf::io::io_type IOType>
template <data_type DataType, io_type IOType>
void BM_csv_read_input(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
Expand All @@ -76,7 +74,7 @@ void BM_csv_read_input(nvbench::state& state,
csv_read_common(d_type, source_type, state);
}

template <cudf::io::io_type IOType>
template <io_type IOType>
void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
Expand All @@ -97,12 +95,10 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::DURATION,
data_type::STRING>;

using io_list =
nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;
using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER>;

NVBENCH_BENCH_TYPES(BM_csv_read_input,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
.set_name("csv_read_data_type")
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4);
Expand Down
8 changes: 3 additions & 5 deletions cpp/benchmarks/io/csv/csv_writer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,7 +28,7 @@
constexpr size_t data_size = 256 << 20;
constexpr cudf::size_type num_cols = 64;

template <data_type DataType, cudf::io::io_type IO>
template <data_type DataType, io_type IO>
void BM_csv_write_dtype_io(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
{
Expand Down Expand Up @@ -112,9 +112,7 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::DURATION,
data_type::STRING>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::VOID>;
using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::VOID>;

NVBENCH_BENCH_TYPES(BM_csv_write_dtype_io, NVBENCH_TYPE_AXES(d_type_list, io_list))
.set_name("csv_write_dtype_io")
Expand Down
23 changes: 15 additions & 8 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ cudf::io::source_info cuio_source_sink_pair::make_source_info()
switch (type) {
case io_type::FILEPATH: return cudf::io::source_info(file_name);
case io_type::HOST_BUFFER: return cudf::io::source_info(h_buffer.data(), h_buffer.size());
case io_type::PINNED_BUFFER: {
pinned_buffer.resize(h_buffer.size());
std::copy(h_buffer.begin(), h_buffer.end(), pinned_buffer.begin());
return cudf::io::source_info(pinned_buffer.data(), pinned_buffer.size());
}
case io_type::DEVICE_BUFFER: {
// TODO: make cuio_source_sink_pair stream-friendly and avoid implicit use of the default
// stream
Expand All @@ -71,7 +76,8 @@ cudf::io::sink_info cuio_source_sink_pair::make_sink_info()
switch (type) {
case io_type::VOID: return cudf::io::sink_info(void_sink.get());
case io_type::FILEPATH: return cudf::io::sink_info(file_name);
case io_type::HOST_BUFFER: [[fallthrough]];
case io_type::HOST_BUFFER:
case io_type::PINNED_BUFFER:
case io_type::DEVICE_BUFFER: return cudf::io::sink_info(&h_buffer);
default: CUDF_FAIL("invalid output type");
}
Expand All @@ -84,7 +90,8 @@ size_t cuio_source_sink_pair::size()
case io_type::FILEPATH:
return static_cast<size_t>(
std::ifstream(file_name, std::ifstream::ate | std::ifstream::binary).tellg());
case io_type::HOST_BUFFER: [[fallthrough]];
case io_type::HOST_BUFFER:
case io_type::PINNED_BUFFER:
case io_type::DEVICE_BUFFER: return h_buffer.size();
default: CUDF_FAIL("invalid output type");
}
Expand Down Expand Up @@ -204,13 +211,13 @@ void try_drop_l3_cache()
"Failed to execute the drop cache command");
}

cudf::io::io_type retrieve_io_type_enum(std::string_view io_string)
io_type retrieve_io_type_enum(std::string_view io_string)
{
if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; }
if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; }
if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; }
if (io_string == "VOID") { return cudf::io::io_type::VOID; }
if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; }
if (io_string == "FILEPATH") { return io_type::FILEPATH; }
if (io_string == "HOST_BUFFER") { return io_type::HOST_BUFFER; }
if (io_string == "PINNED_BUFFER") { return io_type::PINNED_BUFFER; }
if (io_string == "DEVICE_BUFFER") { return io_type::DEVICE_BUFFER; }
if (io_string == "VOID") { return io_type::VOID; }
CUDF_FAIL("Unsupported io_type.");
}

Expand Down
14 changes: 11 additions & 3 deletions cpp/benchmarks/io/cuio_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@

#include <cudf_test/file_utilities.hpp>

#include <cudf/detail/utilities/pinned_host_vector.hpp>
#include <cudf/io/data_sink.hpp>
#include <cudf/io/datasource.hpp>
#include <cudf/io/types.hpp>

#include <rmm/device_uvector.hpp>

using cudf::io::io_type;
// IO types supported in the benchmarks
enum class io_type {
FILEPATH, // Input/output are both files
HOST_BUFFER, // Input/output are both host buffers (pageable)
PINNED_BUFFER, // Input is a pinned host buffer, output is a host buffer (pageable)
DEVICE_BUFFER, // Input is a device buffer, output is a host buffer (pageable)
VOID
};

std::string random_file_in_dir(std::string const& dir_path);

Expand Down Expand Up @@ -72,6 +79,7 @@ class cuio_source_sink_pair {

io_type const type;
std::vector<char> h_buffer;
cudf::detail::pinned_host_vector<char> pinned_buffer;
rmm::device_uvector<std::byte> d_buffer;
std::string const file_name;
std::unique_ptr<cudf::io::data_sink> void_sink;
Expand Down Expand Up @@ -144,7 +152,7 @@ void try_drop_l3_cache();
*
* @return The io_type enum value
*/
cudf::io::io_type retrieve_io_type_enum(std::string_view io_string);
io_type retrieve_io_type_enum(std::string_view io_string);

/**
* @brief Convert a string to the corresponding compression_type enum value.
Expand Down
14 changes: 6 additions & 8 deletions cpp/benchmarks/io/json/json_reader_input.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -70,7 +70,7 @@ cudf::size_type json_write_bm_data(cudf::io::sink_info sink,
return view.num_rows();
}

template <cudf::io::io_type IO>
template <io_type IO>
void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
{
cuio_source_sink_pair source_sink(IO);
Expand All @@ -87,7 +87,7 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_typ
json_read_common(source_sink, num_rows, state);
}

template <data_type DataType, cudf::io::io_type IO>
template <data_type DataType, io_type IO>
void BM_json_read_data_type(
nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
{
Expand All @@ -107,16 +107,14 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::LIST,
data_type::STRUCT>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::DEVICE_BUFFER>;
using io_list =
nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_json_read_data_type,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
.set_name("json_read_data_type")
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4);
Expand Down
9 changes: 4 additions & 5 deletions cpp/benchmarks/io/json/json_writer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,7 +52,7 @@ void json_write_common(cudf::io::json_writer_options const& write_opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <cudf::io::io_type IO>
template <io_type IO>
void BM_json_write_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
Expand Down Expand Up @@ -114,9 +114,8 @@ void BM_json_writer_options(nvbench::state& state)
json_write_common(write_opts, source_sink, data_size, state);
}

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::DEVICE_BUFFER>;
using io_list =
nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;

NVBENCH_BENCH_TYPES(BM_json_write_io, NVBENCH_TYPE_AXES(io_list))
.set_name("json_write_io")
Expand Down
11 changes: 6 additions & 5 deletions cpp/benchmarks/io/nvbench_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
[](auto) { return std::string{}; })

NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
cudf::io::io_type,
io_type,
[](auto value) {
switch (value) {
case cudf::io::io_type::FILEPATH: return "FILEPATH";
case cudf::io::io_type::HOST_BUFFER: return "HOST_BUFFER";
case cudf::io::io_type::DEVICE_BUFFER: return "DEVICE_BUFFER";
case cudf::io::io_type::VOID: return "VOID";
case io_type::FILEPATH: return "FILEPATH";
case io_type::HOST_BUFFER: return "HOST_BUFFER";
case io_type::PINNED_BUFFER: return "PINNED_BUFFER";
case io_type::DEVICE_BUFFER: return "DEVICE_BUFFER";
case io_type::VOID: return "VOID";
default: return "Unknown";
}
},
Expand Down
16 changes: 7 additions & 9 deletions cpp/benchmarks/io/orc/orc_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ void orc_read_common(cudf::size_type num_rows_to_read,

} // namespace

template <data_type DataType, cudf::io::io_type IOType>
template <data_type DataType, io_type IOType>
void BM_orc_read_data(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
Expand All @@ -112,7 +112,7 @@ void BM_orc_read_data(nvbench::state& state,
orc_read_common<false>(num_rows_written, source_sink, state);
}

template <cudf::io::io_type IOType, cudf::io::compression_type Compression, bool chunked_read>
template <io_type IOType, cudf::io::compression_type Compression, bool chunked_read>
void orc_read_io_compression(nvbench::state& state)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
Expand Down Expand Up @@ -150,7 +150,7 @@ void orc_read_io_compression(nvbench::state& state)
orc_read_common<chunked_read>(num_rows_written, source_sink, state);
}

template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
template <io_type IOType, cudf::io::compression_type Compression>
void BM_orc_read_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
Expand All @@ -163,7 +163,7 @@ void BM_orc_chunked_read_io_compression(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<Compression>>)
{
// Only run benchmark using HOST_BUFFER IO.
return orc_read_io_compression<cudf::io::io_type::HOST_BUFFER, Compression, true>(state);
return orc_read_io_compression<io_type::HOST_BUFFER, Compression, true>(state);
}

using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
Expand All @@ -174,16 +174,14 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
data_type::LIST,
data_type::STRUCT>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::DEVICE_BUFFER>;
using io_list =
nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_orc_read_data,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
.set_name("orc_read_decode")
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4)
Expand Down
8 changes: 3 additions & 5 deletions cpp/benchmarks/io/orc/orc_writer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -82,7 +82,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum
state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
}

template <cudf::io::io_type IO, cudf::io::compression_type Compression>
template <io_type IO, cudf::io::compression_type Compression>
void BM_orc_write_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
Expand Down Expand Up @@ -183,9 +183,7 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
data_type::LIST,
data_type::STRUCT>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::VOID>;
using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::VOID>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
Expand Down
16 changes: 11 additions & 5 deletions cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ std::tuple<std::vector<cuio_source_sink_pair>, size_t, size_t> write_file_data(
size_t total_file_size = 0;

for (size_t i = 0; i < num_files; ++i) {
cuio_source_sink_pair source_sink{cudf::io::io_type::HOST_BUFFER};
cuio_source_sink_pair source_sink{io_type::HOST_BUFFER};

auto const tbl = create_random_table(
cycle_dtypes(d_types, num_cols),
Expand Down Expand Up @@ -96,6 +96,10 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state,
cudf::detail::thread_pool threads(num_threads);

auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types);
std::vector<cudf::io::source_info> source_info_vector;
for (auto& source_sink : source_sink_vector) {
source_info_vector.push_back(source_sink.make_source_info());
}

auto mem_stats_logger = cudf::memory_stats_logger();

Expand All @@ -104,9 +108,8 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state,
[&](nvbench::launch& launch, auto& timer) {
auto read_func = [&](int index) {
auto const stream = streams[index % num_threads];
auto& source_sink = source_sink_vector[index];
cudf::io::parquet_reader_options read_opts =
cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
cudf::io::parquet_reader_options::builder(source_info_vector[index]);
cudf::io::read_parquet(read_opts, stream, rmm::mr::get_current_device_resource());
};

Expand Down Expand Up @@ -174,6 +177,10 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state,
auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
cudf::detail::thread_pool threads(num_threads);
auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types);
std::vector<cudf::io::source_info> source_info_vector;
for (auto& source_sink : source_sink_vector) {
source_info_vector.push_back(source_sink.make_source_info());
}
vuule marked this conversation as resolved.
Show resolved Hide resolved

auto mem_stats_logger = cudf::memory_stats_logger();

Expand All @@ -183,9 +190,8 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state,
[&](nvbench::launch& launch, auto& timer) {
auto read_func = [&](int index) {
auto const stream = streams[index % num_threads];
auto& source_sink = source_sink_vector[index];
cudf::io::parquet_reader_options read_opts =
cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
cudf::io::parquet_reader_options::builder(source_info_vector[index]);
// divide chunk limits by number of threads so the number of chunks produced is the
// same for all cases. this seems better than the alternative, which is to keep the
// limits the same. if we do that, as the number of threads goes up, the number of
Expand Down
Loading
Loading