Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable fractional null probability for hashing benchmark #13967

Merged
merged 9 commits into from
Aug 30, 2023
35 changes: 31 additions & 4 deletions cpp/benchmarks/hashing/hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,59 @@
#include <benchmarks/common/generate_input.hpp>

#include <cudf/hashing.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

#include <optional>

static void bench_hash(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const nulls = static_cast<bool>(state.get_float64("nulls"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const nulls = state.get_float64("nulls");
// disable null bitmask if probability is exactly 0.0
bool const no_nulls = nulls == 0.0;
auto const hash_name = state.get_string("hash_name");

data_profile const profile = data_profile_builder().null_probability(nulls);
auto const data = create_random_table(
data_profile const profile =
data_profile_builder().null_probability(no_nulls ? std::nullopt : std::optional<double>{nulls});
auto const data = create_random_table(
{cudf::type_id::INT64, cudf::type_id::STRING}, row_count{num_rows}, profile);

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

// collect statistics
cudf::strings_column_view input(data->get_column(1).view());
auto const chars_size = input.chars_size();
// add memory read from string column
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
// add memory read from int64_t column
state.add_global_memory_reads<nvbench::int64_t>(num_rows);
// add memory read from bitmaks
if (!no_nulls) {
state.add_global_memory_reads<nvbench::int8_t>(2 *
cudf::bitmask_allocation_size_bytes(num_rows));
}
// memory written depends on used hash

if (hash_name == "murmurhash3_x86_32") {
state.add_global_memory_writes<nvbench::uint32_t>(num_rows);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = cudf::hashing::murmurhash3_x86_32(data->view());
});
} else if (hash_name == "md5") {
// md5 writes out string with 32bytes
Blonck marked this conversation as resolved.
Show resolved Hide resolved
state.add_global_memory_writes<nvbench::int8_t>(32 * num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = cudf::hashing::md5(data->view()); });
} else if (hash_name == "spark_murmurhash3_x86_32") {
state.add_global_memory_writes<nvbench::int32_t>(num_rows);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = cudf::hashing::spark_murmurhash3_x86_32(data->view());
});
Expand Down