From 52d7e638af366a2384868c41a7ece889d7ada30e Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Tue, 29 Oct 2024 19:59:13 +0000 Subject: [PATCH] Unified binary_ops and ast benchmarks parameter names (#17200) This merge request unifies the parameter names of the AST and BINARYOP benchmark suites and makes it easier to perform parameter sweeps and compare the outputs of both benchmarks. Authors: - Basit Ayantunde (https://github.com/lamarrr) Approvers: - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/17200 --- cpp/benchmarks/ast/transform.cpp | 26 +++++++++---------- cpp/benchmarks/binaryop/binaryop.cpp | 26 +++++++++---------- cpp/benchmarks/binaryop/compiled_binaryop.cpp | 12 ++++----- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp index 7fe61054a26..2533ea9611c 100644 --- a/cpp/benchmarks/ast/transform.cpp +++ b/cpp/benchmarks/ast/transform.cpp @@ -52,14 +52,14 @@ enum class TreeType { template static void BM_ast_transform(nvbench::state& state) { - auto const table_size = static_cast(state.get_int64("table_size")); + auto const num_rows = static_cast(state.get_int64("num_rows")); auto const tree_levels = static_cast(state.get_int64("tree_levels")); // Create table data auto const n_cols = reuse_columns ? 1 : tree_levels + 1; auto const source_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), - row_count{table_size}, + row_count{num_rows}, Nullable ? std::optional{0.5} : std::nullopt); auto table = source_table->view(); @@ -99,8 +99,8 @@ static void BM_ast_transform(nvbench::state& state) auto const& expression_tree_root = expressions.back(); // Use the number of bytes read from global memory - state.add_global_memory_reads(static_cast(table_size) * (tree_levels + 1)); - state.add_global_memory_writes(table_size); + state.add_global_memory_reads(static_cast(num_rows) * (tree_levels + 1)); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); }); @@ -109,15 +109,15 @@ static void BM_ast_transform(nvbench::state& state) template static void BM_string_compare_ast_transform(nvbench::state& state) { - auto const string_width = static_cast(state.get_int64("string_width")); - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const num_comparisons = static_cast(state.get_int64("num_comparisons")); - auto const hit_rate = static_cast(state.get_int64("hit_rate")); + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const tree_levels = static_cast(state.get_int64("tree_levels")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); - CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons"); + CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons"); // Create table data - auto const num_cols = num_comparisons * 2; + auto const num_cols = tree_levels * 2; std::vector> columns; std::for_each( thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { @@ -150,7 +150,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state) expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1])); std::for_each(thrust::make_counting_iterator(1), - thrust::make_counting_iterator(num_comparisons), + thrust::make_counting_iterator(tree_levels), [&](size_t idx) { auto const& lhs = expressions.back(); auto const& rhs = expressions.emplace_back( @@ -177,7 +177,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state) NVBENCH_BENCH(name) \ .set_name(#name) \ .add_int64_axis("tree_levels", {1, 5, 10}) \ - .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) AST_TRANSFORM_BENCHMARK_DEFINE( ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false); @@ -202,7 +202,7 @@ AST_TRANSFORM_BENCHMARK_DEFINE( .set_name(#name) \ .add_int64_axis("string_width", {32, 64, 128, 256}) \ .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ - .add_int64_axis("num_comparisons", {1, 2, 3, 4}) \ + .add_int64_axis("tree_levels", {1, 2, 3, 4}) \ .add_int64_axis("hit_rate", {50, 100}) AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and, diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp index 35e41c6c2a4..75c91d270a7 100644 --- a/cpp/benchmarks/binaryop/binaryop.cpp +++ b/cpp/benchmarks/binaryop/binaryop.cpp @@ -40,18 +40,18 @@ enum class TreeType { template static void BM_binaryop_transform(nvbench::state& state) { - auto const table_size{static_cast(state.get_int64("table_size"))}; + auto const num_rows{static_cast(state.get_int64("num_rows"))}; auto const tree_levels{static_cast(state.get_int64("tree_levels"))}; // Create table data auto const n_cols = reuse_columns ? 1 : tree_levels + 1; auto const source_table = create_sequence_table( - cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{table_size}); + cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{num_rows}); cudf::table_view table{*source_table}; // Use the number of bytes read from global memory - state.add_global_memory_reads(static_cast(table_size) * (tree_levels + 1)); - state.add_global_memory_writes(table_size); + state.add_global_memory_reads(static_cast(num_rows) * (tree_levels + 1)); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { // Execute tree that chains additions like (((a + b) + c) + d) @@ -74,15 +74,15 @@ static void BM_binaryop_transform(nvbench::state& state) template static void BM_string_compare_binaryop_transform(nvbench::state& state) { - auto const string_width = static_cast(state.get_int64("string_width")); - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const num_comparisons = static_cast(state.get_int64("num_comparisons")); - auto const hit_rate = static_cast(state.get_int64("hit_rate")); + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const tree_levels = static_cast(state.get_int64("tree_levels")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); - CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons"); + CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons"); // Create table data - auto const num_cols = num_comparisons * 2; + auto const num_cols = tree_levels * 2; std::vector> columns; std::for_each( thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { @@ -113,7 +113,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state) cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream); std::for_each( thrust::make_counting_iterator(1), - thrust::make_counting_iterator(num_comparisons), + thrust::make_counting_iterator(tree_levels), [&](size_t idx) { std::unique_ptr comparison = cudf::binary_operation( table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream); @@ -133,7 +133,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state) } \ NVBENCH_BENCH(name) \ .add_int64_axis("tree_levels", {1, 2, 5, 10}) \ - .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique, int32_t, @@ -158,7 +158,7 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique, .set_name(#name) \ .add_int64_axis("string_width", {32, 64, 128, 256}) \ .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ - .add_int64_axis("num_comparisons", {1, 2, 3, 4}) \ + .add_int64_axis("tree_levels", {1, 2, 3, 4}) \ .add_int64_axis("hit_rate", {50, 100}) STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform, diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp index cd3c3871a2e..426f44a4fa1 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -23,10 +23,10 @@ template void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) { - auto const table_size = static_cast(state.get_int64("table_size")); + auto const num_rows = static_cast(state.get_int64("num_rows")); auto const source_table = create_random_table( - {cudf::type_to_id(), cudf::type_to_id()}, row_count{table_size}); + {cudf::type_to_id(), cudf::type_to_id()}, row_count{num_rows}); auto lhs = cudf::column_view(source_table->get_column(0)); auto rhs = cudf::column_view(source_table->get_column(1)); @@ -37,9 +37,9 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) cudf::binary_operation(lhs, rhs, binop, output_dtype); // use number of bytes read and written to global memory - state.add_global_memory_reads(table_size); - state.add_global_memory_reads(table_size); - state.add_global_memory_writes(table_size); + state.add_global_memory_reads(num_rows); + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); }); @@ -55,7 +55,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) } \ NVBENCH_BENCH(name) \ .set_name("compiled_binary_op_" BM_STRINGIFY(name)) \ - .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000}) #define build_name(a, b, c, d) a##_##b##_##c##_##d