From c73ff70dc5ad85d71a0719606c688c2447d55d85 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Thu, 31 Aug 2023 00:29:31 +0200 Subject: [PATCH] Enable fractional null probability for hashing benchmark (#13967) In the past, the HASING_NVBENCH benchmark treated the `nulls` parameter as a boolean. Any value other than 0.0 resulted in a null probability of 1.0. Now, the `nulls` parameter directly determines the null probability. For instance, a value of 0.1 will generate 10% of the data as null. Moreover, setting nulls to 0.0 produces data without a null bitmask. Additionally, `bytes_per_second` are added to the benchmark. This patch relates to #13735. Authors: - Martin Marenz (https://github.com/Blonck) - Yunsong Wang (https://github.com/PointKernel) Approvers: - Yunsong Wang (https://github.com/PointKernel) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/13967 --- cpp/benchmarks/hashing/hash.cpp | 35 +++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp index f0e9202612e..e679b4b62d2 100644 --- a/cpp/benchmarks/hashing/hash.cpp +++ b/cpp/benchmarks/hashing/hash.cpp @@ -17,32 +17,59 @@ #include #include +#include #include #include #include +#include + static void bench_hash(nvbench::state& state) { - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const nulls = static_cast(state.get_float64("nulls")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const nulls = state.get_float64("nulls"); + // disable null bitmask if probability is exactly 0.0 + bool const no_nulls = nulls == 0.0; auto const hash_name = state.get_string("hash_name"); - data_profile const profile = data_profile_builder().null_probability(nulls); - auto const data = create_random_table( + data_profile const profile = + data_profile_builder().null_probability(no_nulls ? std::nullopt : std::optional{nulls}); + auto const data = create_random_table( {cudf::type_id::INT64, cudf::type_id::STRING}, row_count{num_rows}, profile); auto stream = cudf::get_default_stream(); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + // collect statistics + cudf::strings_column_view input(data->get_column(1).view()); + auto const chars_size = input.chars_size(); + // add memory read from string column + state.add_global_memory_reads(chars_size); + // add memory read from int64_t column + state.add_global_memory_reads(num_rows); + // add memory read from bitmaks + if (!no_nulls) { + state.add_global_memory_reads(2 * + cudf::bitmask_allocation_size_bytes(num_rows)); + } + // memory written depends on used hash + if (hash_name == "murmurhash3_x86_32") { + state.add_global_memory_writes(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::murmurhash3_x86_32(data->view()); }); } else if (hash_name == "md5") { + // md5 creates a 32-byte string + state.add_global_memory_writes(32 * num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::md5(data->view()); }); } else if (hash_name == "spark_murmurhash3_x86_32") { + state.add_global_memory_writes(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::spark_murmurhash3_x86_32(data->view()); });