diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index b1aaef41340..943b329a364 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -201,3 +201,30 @@ void try_drop_l3_cache() [](auto& cmd) { return exec_cmd(cmd).empty(); }), "Failed to execute the drop cache command"); } + +cudf::io::io_type retrieve_io_type_enum(std::string_view io_string) +{ + if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; } + if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; } + if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; } + if (io_string == "VOID") { return cudf::io::io_type::VOID; } + if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; } + CUDF_FAIL("Unsupported io_type."); +} + +cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string) +{ + if (compression_string == "NONE") { return cudf::io::compression_type::NONE; } + if (compression_string == "AUTO") { return cudf::io::compression_type::AUTO; } + if (compression_string == "SNAPPY") { return cudf::io::compression_type::SNAPPY; } + if (compression_string == "GZIP") { return cudf::io::compression_type::GZIP; } + if (compression_string == "BZIP2") { return cudf::io::compression_type::BZIP2; } + if (compression_string == "BROTLI") { return cudf::io::compression_type::BROTLI; } + if (compression_string == "ZIP") { return cudf::io::compression_type::ZIP; } + if (compression_string == "XZ") { return cudf::io::compression_type::XZ; } + if (compression_string == "ZLIB") { return cudf::io::compression_type::ZLIB; } + if (compression_string == "LZ4") { return cudf::io::compression_type::LZ4; } + if (compression_string == "LZO") { return cudf::io::compression_type::LZO; } + if (compression_string == "ZSTD") { return cudf::io::compression_type::ZSTD; } + CUDF_FAIL("Unsupported compression_type."); +} diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index 34adae30505..fe509f196be 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -138,3 +138,27 @@ std::vector segments_in_chunk(int num_segments, int num_chunks, * @throw cudf::logic_error if the environment variable is set and the command fails */ void try_drop_l3_cache(); + +/** + * @brief Convert a string to the corresponding io_type enum value. + * + * This function takes a string and returns the matching io_type enum value. It allows you to + * convert a string representation of an io_type into its corresponding enum value. + * + * @param io_string The input string representing the io_type + * + * @return The io_type enum value + */ +cudf::io::io_type retrieve_io_type_enum(std::string_view io_string); + +/** + * @brief Convert a string to the corresponding compression_type enum value. + * + * This function takes a string and returns the matching compression_type enum value. It allows you + * to convert a string representation of a compression_type into its corresponding enum value. + * + * @param compression_string The input string representing the compression_type + * + * @return The compression_type enum value + */ +cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string); diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 80303ea04af..6db147cbfef 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -56,15 +56,14 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -template -void BM_parquet_read_data( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +template +void BM_parquet_read_data(nvbench::state& state, nvbench::type_list>) { - auto const d_type = get_type_or_group(static_cast(DataType)); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - auto const compression = cudf::io::compression_type::SNAPPY; + auto const d_type = get_type_or_group(static_cast(DataType)); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = cudf::io::compression_type::SNAPPY; auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols), @@ -72,7 +71,7 @@ void BM_parquet_read_data( data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); - cuio_source_sink_pair source_sink(IOType); + cuio_source_sink_pair source_sink(source_type); cudf::io::parquet_writer_options write_opts = cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) .compression(compression); @@ -80,10 +79,7 @@ void BM_parquet_read_data( parquet_read_common(write_opts, source_sink, state); } -template -void BM_parquet_read_io_compression( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +void BM_parquet_read_io_compression(nvbench::state& state) { auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), static_cast(data_type::FLOAT), @@ -94,10 +90,10 @@ void BM_parquet_read_io_compression( static_cast(data_type::LIST), static_cast(data_type::STRUCT)}); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - auto const compression = Compression; - auto const source_type = IOType; + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = retrieve_compression_type_enum(state.get_string("compression_type")); auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols), @@ -113,17 +109,15 @@ void BM_parquet_read_io_compression( parquet_read_common(write_opts, source_sink, state); } -template -void BM_parquet_read_io_small_mixed(nvbench::state& state, - nvbench::type_list>) +void BM_parquet_read_io_small_mixed(nvbench::state& state) { auto const d_type = std::pair{cudf::type_id::STRING, cudf::type_id::INT32}; - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - cudf::size_type const num_strings = state.get_int64("num_string_cols"); - auto const source_type = IOType; + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const num_strings = static_cast(state.get_int64("num_string_cols")); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); // want 80 pages total, across 4 columns, so 20 pages per column cudf::size_type constexpr n_col = 4; @@ -145,16 +139,15 @@ void BM_parquet_read_io_small_mixed(nvbench::state& state, parquet_read_common(write_opts, source_sink, state); } -template -void BM_parquet_read_chunks( - nvbench::state& state, - nvbench::type_list, nvbench::enum_type>) +template +void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list>) { - auto const d_type = get_type_or_group(static_cast(DataType)); - cudf::size_type const cardinality = state.get_int64("cardinality"); - cudf::size_type const run_length = state.get_int64("run_length"); - cudf::size_type const byte_limit = state.get_int64("byte_limit"); - auto const compression = cudf::io::compression_type::SNAPPY; + auto const d_type = get_type_or_group(static_cast(DataType)); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const byte_limit = static_cast(state.get_int64("byte_limit")); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = cudf::io::compression_type::SNAPPY; auto const tbl = create_random_table(cycle_dtypes(d_type, num_cols), @@ -162,7 +155,7 @@ void BM_parquet_read_chunks( data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); auto const view = tbl->view(); - cuio_source_sink_pair source_sink(IOType); + cuio_source_sink_pair source_sink(source_type); cudf::io::parquet_writer_options write_opts = cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) .compression(compression); @@ -202,43 +195,33 @@ using d_type_list = nvbench::enum_type_list; -using io_list = nvbench::enum_type_list; - -using compression_list = - nvbench::enum_type_list; - -NVBENCH_BENCH_TYPES(BM_parquet_read_data, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) +NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list)) .set_name("parquet_read_decode") - .set_type_axes_names({"data_type", "io"}) + .set_type_axes_names({"data_type"}) + .add_string_axis("io_type", {"DEVICE_BUFFER"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); -NVBENCH_BENCH_TYPES(BM_parquet_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) +NVBENCH_BENCH(BM_parquet_read_io_compression) .set_name("parquet_read_io_compression") - .set_type_axes_names({"io", "compression"}) + .add_string_axis("io_type", {"FILEPATH", "HOST_BUFFER", "DEVICE_BUFFER"}) + .add_string_axis("compression_type", {"SNAPPY", "NONE"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}); -NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, - NVBENCH_TYPE_AXES(d_type_list, - nvbench::enum_type_list)) +NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, NVBENCH_TYPE_AXES(d_type_list)) .set_name("parquet_read_chunks") - .set_type_axes_names({"data_type", "io"}) + .add_string_axis("io_type", {"DEVICE_BUFFER"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32}) .add_int64_axis("byte_limit", {0, 500'000}); -NVBENCH_BENCH_TYPES(BM_parquet_read_io_small_mixed, - NVBENCH_TYPE_AXES(nvbench::enum_type_list)) +NVBENCH_BENCH(BM_parquet_read_io_small_mixed) .set_name("parquet_read_io_small_mixed") - .set_type_axes_names({"io"}) + .add_string_axis("io_type", {"FILEPATH"}) .set_min_samples(4) .add_int64_axis("cardinality", {0, 1000}) .add_int64_axis("run_length", {1, 32})