From 0ec95d68ef66fe92e79d155472cb69cf6bae0399 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Mon, 30 Oct 2023 15:52:40 -0700 Subject: [PATCH 1/9] use string axis for io type in BM_parquet_read_data Signed-off-by: Suraj Aralihalli --- cpp/benchmarks/io/cuio_common.cpp | 44 +++++++++++++++++++ .../io/parquet/parquet_reader_input.cpp | 14 +++--- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index b1aaef41340..a55d2d1b489 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -201,3 +201,47 @@ void try_drop_l3_cache() [](auto& cmd) { return exec_cmd(cmd).empty(); }), "Failed to execute the drop cache command"); } + +cudf::io::io_type retrieve_io_type_enum(std::string const& io_string) +{ + if (io_string == "FILEPATH") { + return cudf::io::io_type::FILEPATH; + } else if (io_string == "HOST_BUFFER") { + return cudf::io::io_type::HOST_BUFFER; + } else if (io_string == "DEVICE_BUFFER") { + return cudf::io::io_type::DEVICE_BUFFER; + } else if (io_string == "VOID") { + return cudf::io::io_type::VOID; + } else if (io_string == "USER_IMPLEMENTED") { + return cudf::io::io_type::USER_IMPLEMENTED; + } +} + +cudf::io::compression_type retrieve_compression_type_enum(std::string const& compression_string) +{ + if (compression_string == "NONE") { + return cudf::io::compression_type::NONE; + } else if (compression_string == "AUTO") { + return cudf::io::compression_type::AUTO; + } else if (compression_string == "SNAPPY") { + return cudf::io::compression_type::SNAPPY; + } else if (compression_string == "GZIP") { + return cudf::io::compression_type::GZIP; + } else if (compression_string == "BZIP2") { + return cudf::io::compression_type::BZIP2; + } else if (compression_string == "BROTLI") { + return cudf::io::compression_type::BROTLI; + } else if (compression_string == "ZIP") { + return cudf::io::compression_type::ZIP; + } else if (compression_string == "XZ") { + return cudf::io::compression_type::XZ; + } else if (compression_string == "ZLIB") { + return cudf::io::compression_type::ZLIB; + } else if (compression_string == "LZ4") { + return cudf::io::compression_type::LZ4; + } else if (compression_string == "LZO") { + return cudf::io::compression_type::LZO; + } else if (compression_string == "ZSTD") { + return cudf::io::compression_type::ZSTD; + } +} diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 80303ea04af..288daf1d6d7 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -56,14 +56,13 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template -void BM_parquet_read_data( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +template +void BM_parquet_read_data(nvbench::state& state, nvbench::type_list < nvbench::enum_type) { auto const d_type = get_type_or_group(static_cast(DataType)); cudf::size_type const cardinality = state.get_int64("cardinality"); cudf::size_type const run_length = state.get_int64("run_length"); + cudf::io::io_type const io_type = retrieve_io_type_enum(state.get_string("io_type")); auto const compression = cudf::io::compression_type::SNAPPY; auto const tbl = @@ -72,7 +71,7 @@ void BM_parquet_read_data( data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); - cuio_source_sink_pair source_sink(IOType); + cuio_source_sink_pair source_sink(io_type); cudf::io::parquet_writer_options write_opts = cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) .compression(compression); @@ -225,12 +224,11 @@ NVBENCH_BENCH_TYPES(BM_parquet_read_io_compression, NVBENCH_TYPE_AXES(io_list, c .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); -NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) +NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, NVBENCH_TYPE_AXES(d_type_list)) .set_name("parquet_read_chunks") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4) + .add_string_axis("io_type", {"DEVICE_BUFFER"}) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) .add_int64_axis("byte_limit", {0, 500'000}); From b9f911d8a9ccebeba76fc6c6fc27a4e13da52b8c Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Mon, 30 Oct 2023 16:20:18 -0700 Subject: [PATCH 2/9] update header file Signed-off-by: Suraj Aralihalli --- cpp/benchmarks/io/cuio_common.cpp | 4 ++++ cpp/benchmarks/io/cuio_common.hpp | 24 +++++++++++++++++++ .../io/parquet/parquet_reader_input.cpp | 10 ++++---- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index a55d2d1b489..c294a64c3f2 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -214,6 +214,8 @@ cudf::io::io_type retrieve_io_type_enum(std::string const& io_string) return cudf::io::io_type::VOID; } else if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; + } else { + return cudf::io::io_type::VOID; } } @@ -243,5 +245,7 @@ cudf::io::compression_type retrieve_compression_type_enum(std::string const& com return cudf::io::compression_type::LZO; } else if (compression_string == "ZSTD") { return cudf::io::compression_type::ZSTD; + } else { + return cudf::io::compression_type::NONE; } } diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 34adae30505..0c6ea9b4f03 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -138,3 +138,27 @@ std::vector segments_in_chunk(int num_segments, int num_chunks, * @throw cudf::logic_error if the environment variable is set and the command fails */ void try_drop_l3_cache(); + +/** + * @brief Convert a string to the corresponding io_type enum value. + * + * This function takes a string and returns the matching io_type enum value. It allows you to + * convert a string representation of an io_type into its corresponding enum value. + * + * @param io_string The input string representing the io_type + * + * @return The io_type enum value + */ +cudf::io::io_type retrieve_io_type_enum(std::string const& io_string); + +/** + * @brief Convert a string to the corresponding compression_type enum value. + * + * This function takes a string and returns the matching compression_type enum value. It allows you + * to convert a string representation of a compression_type into its corresponding enum value. + * + * @param compression_string The input string representing the compression_type + * + * @return The compression_type enum value + */ +cudf::io::compression_type retrieve_compression_type_enum(std::string const& compression_string); diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 288daf1d6d7..346abb9836e 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -57,7 +57,7 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts, } template -void BM_parquet_read_data(nvbench::state& state, nvbench::type_list < nvbench::enum_type) +void BM_parquet_read_data(nvbench::state& state, nvbench::type_list>) { auto const d_type = get_type_or_group(static_cast(DataType)); cudf::size_type const cardinality = state.get_int64("cardinality"); @@ -208,9 +208,7 @@ using io_list = nvbench::enum_type_list; -NVBENCH_BENCH_TYPES(BM_parquet_read_data, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) +NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list)) .set_name("parquet_read_decode") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4) @@ -224,7 +222,9 @@ NVBENCH_BENCH_TYPES(BM_parquet_read_io_compression, NVBENCH_TYPE_AXES(io_list, c .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); -NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, NVBENCH_TYPE_AXES(d_type_list)) +NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) .set_name("parquet_read_chunks") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4) From 9206a346e2fd39dbaa550caff2662fc64ec082a2 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Tue, 31 Oct 2023 00:05:07 -0700 Subject: [PATCH 3/9] make BM_parquet_read_data working Signed-off-by: Suraj Aralihalli --- cpp/benchmarks/io/parquet/parquet_reader_input.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 346abb9836e..87c48d3c2eb 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -210,7 +210,8 @@ using compression_list = NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list)) .set_name("parquet_read_decode") - .set_type_axes_names({"data_type", "io"}) + .set_type_axes_names({"data_type"}) + .add_string_axis("io_type", {"DEVICE_BUFFER"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); @@ -228,7 +229,6 @@ NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, .set_name("parquet_read_chunks") .set_type_axes_names({"data_type", "io"}) .set_min_samples(4) - .add_string_axis("io_type", {"DEVICE_BUFFER"}) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) .add_int64_axis("byte_limit", {0, 500'000}); From ec0fe551c721b7a1f9d3fccab6d804add828e14f Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Tue, 31 Oct 2023 00:35:16 -0700 Subject: [PATCH 4/9] add string axis type to all benchmarks in parquet_reader_input.cpp Signed-off-by: Suraj Aralihalli --- .../io/parquet/parquet_reader_input.cpp | 80 ++++++++----------- 1 file changed, 33 insertions(+), 47 deletions(-) diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 87c48d3c2eb..d2b00507095 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -59,11 +59,11 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts, template void BM_parquet_read_data(nvbench::state& state, nvbench::type_list>) { - auto const d_type = get_type_or_group(static_cast(DataType)); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - cudf::io::io_type const io_type = retrieve_io_type_enum(state.get_string("io_type")); - auto const compression = cudf::io::compression_type::SNAPPY; + auto const d_type = get_type_or_group(static_cast(DataType)); + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = cudf::io::compression_type::SNAPPY; auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols), @@ -71,7 +71,7 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_listview(); - cuio_source_sink_pair source_sink(io_type); + cuio_source_sink_pair source_sink(source_type); cudf::io::parquet_writer_options write_opts = cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) .compression(compression); @@ -79,10 +79,7 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list -void BM_parquet_read_io_compression( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +void BM_parquet_read_io_compression(nvbench::state& state) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), static_cast(data_type::FLOAT), @@ -93,10 +90,11 @@ void BM_parquet_read_io_compression( static_cast(data_type::LIST), static_cast(data_type::STRUCT)}); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - auto const compression = Compression; - auto const source_type = IOType; + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); + cudf::io::compression_type const compression = + retrieve_compression_type_enum(state.get_string("compression_type")); auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols), @@ -112,17 +110,15 @@ void BM_parquet_read_io_compression( parquet_read_common(write_opts, source_sink, state); } -template -void BM_parquet_read_io_small_mixed(nvbench::state& state, - nvbench::type_list>) +void BM_parquet_read_io_small_mixed(nvbench::state& state) { auto const d_type = std::pair{cudf::type_id::STRING, cudf::type_id::INT32}; - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - cudf::size_type const num_strings = state.get_int64("num_string_cols"); - auto const source_type = IOType; + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + cudf::size_type const num_strings = state.get_int64("num_string_cols"); + cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); // want 80 pages total, across 4 columns, so 20 pages per column cudf::size_type constexpr n_col = 4; @@ -144,16 +140,15 @@ void BM_parquet_read_io_small_mixed(nvbench::state& state, parquet_read_common(write_opts, source_sink, state); } -template -void BM_parquet_read_chunks( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +template +void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list>) { - auto const d_type = get_type_or_group(static_cast(DataType)); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - cudf::size_type const byte_limit = state.get_int64("byte_limit"); - auto const compression = cudf::io::compression_type::SNAPPY; + auto const d_type = get_type_or_group(static_cast(DataType)); + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + cudf::size_type const byte_limit = state.get_int64("byte_limit"); + cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = cudf::io::compression_type::SNAPPY; auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols), @@ -161,7 +156,7 @@ void BM_parquet_read_chunks( data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); - cuio_source_sink_pair source_sink(IOType); + cuio_source_sink_pair source_sink(source_type); cudf::io::parquet_writer_options write_opts = cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) .compression(compression); @@ -201,13 +196,6 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; - -using compression_list = - nvbench::enum_type_list; - NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list)) .set_name("parquet_read_decode") .set_type_axes_names({"data_type"}) @@ -216,27 +204,25 @@ NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list)) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); -NVBENCH_BENCH_TYPES(BM_parquet_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) +NVBENCH_BENCH(BM_parquet_read_io_compression) .set_name("parquet_read_io_compression") - .set_type_axes_names({"io", "compression"}) + .add_string_axis("io_type", {"FILEPATH", "HOST_BUFFER", "DEVICE_BUFFER"}) + .add_string_axis("compression_type", {"SNAPPY", "NONE"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); -NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) +NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, NVBENCH_TYPE_AXES(d_type_list)) .set_name("parquet_read_chunks") - .set_type_axes_names({"data_type", "io"}) + .add_string_axis("io_type", {"DEVICE_BUFFER"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) .add_int64_axis("byte_limit", {0, 500'000}); -NVBENCH_BENCH_TYPES(BM_parquet_read_io_small_mixed, - NVBENCH_TYPE_AXES(nvbench::enum_type_list)) +NVBENCH_BENCH(BM_parquet_read_io_small_mixed) .set_name("parquet_read_io_small_mixed") - .set_type_axes_names({"io"}) + .add_string_axis("io_type", {"FILEPATH"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) From 6783527ae7f6bb68afeb5612f1e507ed1fbf7a66 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Tue, 14 Nov 2023 14:52:31 -0800 Subject: [PATCH 5/9] cuda fail when incorrect io/compression type is used Signed-off-by: Suraj Aralihalli --- cpp/benchmarks/io/cuio_common.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index c294a64c3f2..04553e0c731 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -215,7 +215,7 @@ cudf::io::io_type retrieve_io_type_enum(std::string const& io_string) } else if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; } else { - return cudf::io::io_type::VOID; + CUDF_FAIL("Unsupported io_type."); } } @@ -246,6 +246,6 @@ cudf::io::compression_type retrieve_compression_type_enum(std::string const& com } else if (compression_string == "ZSTD") { return cudf::io::compression_type::ZSTD; } else { - return cudf::io::compression_type::NONE; + CUDF_FAIL("Unsupported compression_type."); } } From 9d2ecc791f385823bcdb27a2a1734b1a43053c6d Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Fri, 17 Nov 2023 11:13:44 -0800 Subject: [PATCH 6/9] use string_view Signed-off-by: Suraj Aralihalli --- cpp/benchmarks/io/cuio_common.cpp | 4 ++-- cpp/benchmarks/io/cuio_common.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 04553e0c731..2173373265f 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -202,7 +202,7 @@ void try_drop_l3_cache() "Failed to execute the drop cache command"); } -cudf::io::io_type retrieve_io_type_enum(std::string const& io_string) +cudf::io::io_type retrieve_io_type_enum(std::string_view io_string) { if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; @@ -219,7 +219,7 @@ cudf::io::io_type retrieve_io_type_enum(std::string const& io_string) } } -cudf::io::compression_type retrieve_compression_type_enum(std::string const& compression_string) +cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string) { if (compression_string == "NONE") { return cudf::io::compression_type::NONE; diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 0c6ea9b4f03..fe509f196be 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -149,7 +149,7 @@ void try_drop_l3_cache(); * * @return The io_type enum value */ -cudf::io::io_type retrieve_io_type_enum(std::string const& io_string); +cudf::io::io_type retrieve_io_type_enum(std::string_view io_string); /** * @brief Convert a string to the corresponding compression_type enum value. @@ -161,4 +161,4 @@ cudf::io::io_type retrieve_io_type_enum(std::string const& io_string); * * @return The compression_type enum value */ -cudf::io::compression_type retrieve_compression_type_enum(std::string const& compression_string); +cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string); From 3b593248eb5b9e91fc66d437a2260ce72405f8ea Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Fri, 8 Dec 2023 02:46:55 -0800 Subject: [PATCH 7/9] else if -> if Signed-off-by: Suraj Aralihalli --- cpp/benchmarks/io/cuio_common.cpp | 59 ++++++++++--------------------- 1 file changed, 19 insertions(+), 40 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 2173373265f..943b329a364 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -204,48 +204,27 @@ void try_drop_l3_cache() cudf::io::io_type retrieve_io_type_enum(std::string_view io_string) { - if (io_string == "FILEPATH") { - return cudf::io::io_type::FILEPATH; - } else if (io_string == "HOST_BUFFER") { - return cudf::io::io_type::HOST_BUFFER; - } else if (io_string == "DEVICE_BUFFER") { - return cudf::io::io_type::DEVICE_BUFFER; - } else if (io_string == "VOID") { - return cudf::io::io_type::VOID; - } else if (io_string == "USER_IMPLEMENTED") { - return cudf::io::io_type::USER_IMPLEMENTED; - } else { - CUDF_FAIL("Unsupported io_type."); - } + if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; } + if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; } + if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; } + if (io_string == "VOID") { return cudf::io::io_type::VOID; } + if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; } + CUDF_FAIL("Unsupported io_type."); } cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string) { - if (compression_string == "NONE") { - return cudf::io::compression_type::NONE; - } else if (compression_string == "AUTO") { - return cudf::io::compression_type::AUTO; - } else if (compression_string == "SNAPPY") { - return cudf::io::compression_type::SNAPPY; - } else if (compression_string == "GZIP") { - return cudf::io::compression_type::GZIP; - } else if (compression_string == "BZIP2") { - return cudf::io::compression_type::BZIP2; - } else if (compression_string == "BROTLI") { - return cudf::io::compression_type::BROTLI; - } else if (compression_string == "ZIP") { - return cudf::io::compression_type::ZIP; - } else if (compression_string == "XZ") { - return cudf::io::compression_type::XZ; - } else if (compression_string == "ZLIB") { - return cudf::io::compression_type::ZLIB; - } else if (compression_string == "LZ4") { - return cudf::io::compression_type::LZ4; - } else if (compression_string == "LZO") { - return cudf::io::compression_type::LZO; - } else if (compression_string == "ZSTD") { - return cudf::io::compression_type::ZSTD; - } else { - CUDF_FAIL("Unsupported compression_type."); - } + if (compression_string == "NONE") { return cudf::io::compression_type::NONE; } + if (compression_string == "AUTO") { return cudf::io::compression_type::AUTO; } + if (compression_string == "SNAPPY") { return cudf::io::compression_type::SNAPPY; } + if (compression_string == "GZIP") { return cudf::io::compression_type::GZIP; } + if (compression_string == "BZIP2") { return cudf::io::compression_type::BZIP2; } + if (compression_string == "BROTLI") { return cudf::io::compression_type::BROTLI; } + if (compression_string == "ZIP") { return cudf::io::compression_type::ZIP; } + if (compression_string == "XZ") { return cudf::io::compression_type::XZ; } + if (compression_string == "ZLIB") { return cudf::io::compression_type::ZLIB; } + if (compression_string == "LZ4") { return cudf::io::compression_type::LZ4; } + if (compression_string == "LZO") { return cudf::io::compression_type::LZO; } + if (compression_string == "ZSTD") { return cudf::io::compression_type::ZSTD; } + CUDF_FAIL("Unsupported compression_type."); } From c4ab7a75677dd864fee6184806dd5fc83dc25a3a Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Fri, 8 Dec 2023 07:02:15 -0800 Subject: [PATCH 8/9] static cast int64 to int32 Signed-off-by: Suraj Aralihalli --- .../io/parquet/parquet_reader_input.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index d2b00507095..3ed35f6cb20 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -59,9 +59,9 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts, template void BM_parquet_read_data(nvbench::state& state, nvbench::type_list>) { - auto const d_type = get_type_or_group(static_cast(DataType)); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); + auto const d_type = get_type_or_group(static_cast(DataType)); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); auto const compression = cudf::io::compression_type::SNAPPY; @@ -90,8 +90,8 @@ void BM_parquet_read_io_compression(nvbench::state& state) static_cast(data_type::LIST), static_cast(data_type::STRUCT)}); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); cudf::io::compression_type const compression = retrieve_compression_type_enum(state.get_string("compression_type")); @@ -115,9 +115,9 @@ void BM_parquet_read_io_small_mixed(nvbench::state& state) auto const d_type = std::pair{cudf::type_id::STRING, cudf::type_id::INT32}; - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - cudf::size_type const num_strings = state.get_int64("num_string_cols"); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const num_strings = static_cast(state.get_int64("num_string_cols")); cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); // want 80 pages total, across 4 columns, so 20 pages per column @@ -143,10 +143,10 @@ void BM_parquet_read_io_small_mixed(nvbench::state& state) template void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list>) { - auto const d_type = get_type_or_group(static_cast(DataType)); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - cudf::size_type const byte_limit = state.get_int64("byte_limit"); + auto const d_type = get_type_or_group(static_cast(DataType)); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const byte_limit = static_cast(state.get_int64("byte_limit")); cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); auto const compression = cudf::io::compression_type::SNAPPY; From 585c328d30a0e2cfe75b099f01890fc4f58430df Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Fri, 8 Dec 2023 09:17:57 -0800 Subject: [PATCH 9/9] use auto Signed-off-by: Suraj Aralihalli --- .../io/parquet/parquet_reader_input.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 3ed35f6cb20..6db147cbfef 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -62,8 +62,8 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list(DataType)); auto const cardinality = static_cast(state.get_int64("cardinality")); auto const run_length = static_cast(state.get_int64("run_length")); - cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); - auto const compression = cudf::io::compression_type::SNAPPY; + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = cudf::io::compression_type::SNAPPY; auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols), @@ -92,9 +92,8 @@ void BM_parquet_read_io_compression(nvbench::state& state) auto const cardinality = static_cast(state.get_int64("cardinality")); auto const run_length = static_cast(state.get_int64("run_length")); - cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); - cudf::io::compression_type const compression = - retrieve_compression_type_enum(state.get_string("compression_type")); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = retrieve_compression_type_enum(state.get_string("compression_type")); auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols), @@ -118,7 +117,7 @@ void BM_parquet_read_io_small_mixed(nvbench::state& state) auto const cardinality = static_cast(state.get_int64("cardinality")); auto const run_length = static_cast(state.get_int64("run_length")); auto const num_strings = static_cast(state.get_int64("num_string_cols")); - cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); // want 80 pages total, across 4 columns, so 20 pages per column cudf::size_type constexpr n_col = 4; @@ -147,8 +146,8 @@ void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list(state.get_int64("cardinality")); auto const run_length = static_cast(state.get_int64("run_length")); auto const byte_limit = static_cast(state.get_int64("byte_limit")); - cudf::io::io_type const source_type = retrieve_io_type_enum(state.get_string("io_type")); - auto const compression = cudf::io::compression_type::SNAPPY; + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = cudf::io::compression_type::SNAPPY; auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols),