Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unified binary_ops and ast benchmarks parameter names #17200

Merged
merged 2 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ enum class TreeType {
template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
static void BM_ast_transform(nvbench::state& state)
{
auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table =
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
row_count{table_size},
row_count{num_rows},
Nullable ? std::optional<double>{0.5} : std::nullopt);
auto table = source_table->view();

Expand Down Expand Up @@ -99,8 +99,8 @@ static void BM_ast_transform(nvbench::state& state)
auto const& expression_tree_root = expressions.back();

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);
state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
Expand All @@ -109,15 +109,15 @@ static void BM_ast_transform(nvbench::state& state)
template <cudf::ast::ast_operator cmp_op, cudf::ast::ast_operator reduce_op>
static void BM_string_compare_ast_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");
CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
auto const num_cols = tree_levels * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
Expand Down Expand Up @@ -150,7 +150,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1]));

std::for_each(thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
thrust::make_counting_iterator(tree_levels),
[&](size_t idx) {
auto const& lhs = expressions.back();
auto const& rhs = expressions.emplace_back(
Expand All @@ -177,7 +177,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
NVBENCH_BENCH(name) \
.set_name(#name) \
.add_int64_axis("tree_levels", {1, 5, 10}) \
.add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})

AST_TRANSFORM_BENCHMARK_DEFINE(
ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false);
Expand All @@ -202,7 +202,7 @@ AST_TRANSFORM_BENCHMARK_DEFINE(
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("tree_levels", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and,
Expand Down
26 changes: 13 additions & 13 deletions cpp/benchmarks/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,18 @@ enum class TreeType {
template <typename key_type, TreeType tree_type, bool reuse_columns>
static void BM_binaryop_transform(nvbench::state& state)
{
auto const table_size{static_cast<cudf::size_type>(state.get_int64("table_size"))};
auto const num_rows{static_cast<cudf::size_type>(state.get_int64("num_rows"))};
auto const tree_levels{static_cast<cudf::size_type>(state.get_int64("tree_levels"))};

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table = create_sequence_table(
cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{table_size});
cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{num_rows});
cudf::table_view table{*source_table};

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);
state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(num_rows);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
// Execute tree that chains additions like (((a + b) + c) + d)
Expand All @@ -74,15 +74,15 @@ static void BM_binaryop_transform(nvbench::state& state)
template <cudf::binary_operator cmp_op, cudf::binary_operator reduce_op>
static void BM_string_compare_binaryop_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");
CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
auto const num_cols = tree_levels * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
Expand Down Expand Up @@ -113,7 +113,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state)
cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream);
std::for_each(
thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
thrust::make_counting_iterator(tree_levels),
[&](size_t idx) {
std::unique_ptr<cudf::column> comparison = cudf::binary_operation(
table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream);
Expand All @@ -133,7 +133,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state)
} \
NVBENCH_BENCH(name) \
.add_int64_axis("tree_levels", {1, 2, 5, 10}) \
.add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})

BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique,
int32_t,
Expand All @@ -158,7 +158,7 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique,
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("tree_levels", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform,
Expand Down
12 changes: 6 additions & 6 deletions cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
template <typename TypeLhs, typename TypeRhs, typename TypeOut>
void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
{
auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));

auto const source_table = create_random_table(
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{table_size});
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{num_rows});

auto lhs = cudf::column_view(source_table->get_column(0));
auto rhs = cudf::column_view(source_table->get_column(1));
Expand All @@ -37,9 +37,9 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
cudf::binary_operation(lhs, rhs, binop, output_dtype);

// use number of bytes read and written to global memory
state.add_global_memory_reads<TypeLhs>(table_size);
state.add_global_memory_reads<TypeRhs>(table_size);
state.add_global_memory_writes<TypeOut>(table_size);
state.add_global_memory_reads<TypeLhs>(num_rows);
state.add_global_memory_reads<TypeRhs>(num_rows);
state.add_global_memory_writes<TypeOut>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); });
Expand All @@ -55,7 +55,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
} \
NVBENCH_BENCH(name) \
.set_name("compiled_binary_op_" BM_STRINGIFY(name)) \
.add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000})

#define build_name(a, b, c, d) a##_##b##_##c##_##d

Expand Down
Loading