From 6090626f6e35a585cc4afcc56601253131c5afd3 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 26 Oct 2022 12:10:24 +0000 Subject: [PATCH] add datasource to multibyte_split benchmark --- cpp/benchmarks/io/text/multibyte_split.cpp | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index b7e85d8aa7e..6edae80e79e 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -45,7 +45,7 @@ temp_directory const temp_dir("cudf_nvbench"); -enum class data_chunk_source_type { device, file, host, host_pinned, file_bgzip }; +enum class data_chunk_source_type { device, file, file_datasource, host, host_pinned, file_bgzip }; NVBENCH_DECLARE_ENUM_TYPE_STRINGS( data_chunk_source_type, @@ -53,6 +53,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS( switch (value) { case data_chunk_source_type::device: return "device"; case data_chunk_source_type::file: return "file"; + case data_chunk_source_type::file_datasource: return "file_datasource"; case data_chunk_source_type::host: return "host"; case data_chunk_source_type::host_pinned: return "host_pinned"; case data_chunk_source_type::file_bgzip: return "file_bgzip"; @@ -133,13 +134,14 @@ static void bench_multibyte_split(nvbench::state& state, std::iota(delim.begin(), delim.end(), '1'); auto const delim_factor = static_cast(delim_percent) / 100; - auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); - auto host_input = std::vector{}; + std::unique_ptr datasource; + auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); + auto host_input = std::vector{}; auto host_pinned_input = thrust::host_vector>{}; - if (source_type == data_chunk_source_type::host || source_type == data_chunk_source_type::file || - source_type == data_chunk_source_type::file_bgzip) { + if (source_type != data_chunk_source_type::device && + source_type != data_chunk_source_type::host_pinned) { host_input = cudf::detail::make_std_vector_sync( {device_input.data(), static_cast(device_input.size())}, cudf::default_stream_value); @@ -154,11 +156,17 @@ static void bench_multibyte_split(nvbench::state& state, auto source = [&] { switch (source_type) { - case data_chunk_source_type::file: { + case data_chunk_source_type::file: + case data_chunk_source_type::file_datasource: { auto const temp_file_name = random_file_in_dir(temp_dir.path()); std::ofstream(temp_file_name, std::ofstream::out) .write(host_input.data(), host_input.size()); - return cudf::io::text::make_source_from_file(temp_file_name); + if (source_type == data_chunk_source_type::file) { + return cudf::io::text::make_source_from_file(temp_file_name); + } else { + datasource = cudf::io::datasource::create(temp_file_name); + return cudf::io::text::make_source(*datasource); + } } case data_chunk_source_type::host: // return cudf::io::text::make_source(host_input); @@ -197,6 +205,7 @@ static void bench_multibyte_split(nvbench::state& state, using source_type_list = nvbench::enum_type_list;