Skip to content

Commit

Permalink
Move strings repeat benchmarks to nvbench (#17304)
Browse files Browse the repository at this point in the history
Moves the `cpp/benchmarks/string/repeat_strings.cpp` implementation from google-bench to nvbench.
This covers the overloads of the `cudf::strings::repeat_strings` API.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)

URL: #17304
  • Loading branch information
davidwendt authored Nov 20, 2024
1 parent 332cc06 commit d927992
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 83 deletions.
3 changes: 2 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ ConfigureNVBench(

# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(STRINGS_BENCH string/factory.cu string/repeat_strings.cpp)
ConfigureBench(STRINGS_BENCH string/factory.cu)

ConfigureNVBench(
STRINGS_NVBENCH
Expand All @@ -384,6 +384,7 @@ ConfigureNVBench(
string/lengths.cpp
string/like.cpp
string/make_strings_column.cu
string/repeat_strings.cpp
string/replace.cpp
string/replace_re.cpp
string/reverse.cpp
Expand Down
123 changes: 41 additions & 82 deletions cpp/benchmarks/string/repeat_strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,99 +14,58 @@
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/repeat_strings.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

static constexpr cudf::size_type default_repeat_times = 16;
static constexpr cudf::size_type min_repeat_times = -16;
static constexpr cudf::size_type max_repeat_times = 16;
#include <nvbench/nvbench.cuh>

static std::unique_ptr<cudf::table> create_data_table(cudf::size_type n_cols,
cudf::size_type n_rows,
cudf::size_type max_str_length)
static void bench_repeat(nvbench::state& state)
{
CUDF_EXPECTS(n_cols == 1 || n_cols == 2, "Invalid number of columns.");
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
auto const min_repeat = static_cast<cudf::size_type>(state.get_int64("min_repeat"));
auto const max_repeat = static_cast<cudf::size_type>(state.get_int64("max_repeat"));
auto const api = state.get_string("api");

std::vector<cudf::type_id> dtype_ids{cudf::type_id::STRING};
auto builder = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);

if (n_cols == 2) {
dtype_ids.push_back(cudf::type_id::INT32);
builder.distribution(
cudf::type_id::INT32, distribution_id::NORMAL, min_repeat_times, max_repeat_times);
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
builder.distribution(cudf::type_id::INT32, distribution_id::NORMAL, min_repeat, max_repeat);

auto const table = create_random_table(
{cudf::type_id::STRING, cudf::type_id::INT32}, row_count{num_rows}, data_profile{builder});
auto const input = cudf::strings_column_view(table->view().column(0));

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto chars_size = input.chars_size(stream);
state.add_global_memory_reads<nvbench::int8_t>(chars_size);

if (api == "scalar") {
state.add_global_memory_writes<nvbench::int8_t>(chars_size * max_repeat);
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::repeat_strings(input, max_repeat); });
} else if (api == "column") {
auto repeats = table->view().column(1);
{
auto result = cudf::strings::repeat_strings(input, repeats);
auto output = cudf::strings_column_view(result->view());
state.add_global_memory_writes<nvbench::int8_t>(output.chars_size(stream));
}
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::repeat_strings(input, repeats); });
}

return create_random_table(dtype_ids, row_count{n_rows}, data_profile{builder});
}

static void BM_repeat_strings_scalar_times(benchmark::State& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const table = create_data_table(1, n_rows, max_str_length);
auto const strings_col = cudf::strings_column_view(table->view().column(0));

for ([[maybe_unused]] auto _ : state) {
[[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
cudf::strings::repeat_strings(strings_col, default_repeat_times);
}

state.SetBytesProcessed(state.iterations() * strings_col.chars_size(cudf::get_default_stream()));
}

static void BM_repeat_strings_column_times(benchmark::State& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const table = create_data_table(2, n_rows, max_str_length);
auto const strings_col = cudf::strings_column_view(table->view().column(0));
auto const repeat_times_col = table->view().column(1);

for ([[maybe_unused]] auto _ : state) {
[[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
cudf::strings::repeat_strings(strings_col, repeat_times_col);
}

state.SetBytesProcessed(state.iterations() * (strings_col.chars_size(cudf::get_default_stream()) +
repeat_times_col.size() * sizeof(int32_t)));
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 8;
int const max_rows = 1 << 18;
int const row_mult = 4;
int const min_strlen = 1 << 4;
int const max_strlen = 1 << 8;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_strlen, max_strlen, len_mult);
}

class RepeatStrings : public cudf::benchmark {};

#define REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(RepeatStrings, name) \
(::benchmark::State & st) { BM_repeat_strings_scalar_times(st); } \
BENCHMARK_REGISTER_F(RepeatStrings, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

#define REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(RepeatStrings, name) \
(::benchmark::State & st) { BM_repeat_strings_column_times(st); } \
BENCHMARK_REGISTER_F(RepeatStrings, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times)
REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times)
NVBENCH_BENCH(bench_repeat)
.set_name("repeat")
.add_int64_axis("min_width", {0})
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("min_repeat", {0})
.add_int64_axis("max_repeat", {16})
.add_int64_axis("num_rows", {32768, 262144, 2097152})
.add_string_axis("api", {"scalar", "column"});

0 comments on commit d927992

Please sign in to comment.