Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move strings repeat benchmarks to nvbench #17304

Merged
merged 11 commits into from
Nov 20, 2024
3 changes: 2 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ ConfigureNVBench(

# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(STRINGS_BENCH string/factory.cu string/repeat_strings.cpp string/url_decode.cu)
ConfigureBench(STRINGS_BENCH string/factory.cu string/url_decode.cu)

ConfigureNVBench(
STRINGS_NVBENCH
Expand All @@ -378,6 +378,7 @@ ConfigureNVBench(
string/lengths.cpp
string/like.cpp
string/make_strings_column.cu
string/repeat_strings.cpp
string/replace.cpp
string/replace_re.cpp
string/reverse.cpp
Expand Down
123 changes: 41 additions & 82 deletions cpp/benchmarks/string/repeat_strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,99 +14,58 @@
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/repeat_strings.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

static constexpr cudf::size_type default_repeat_times = 16;
static constexpr cudf::size_type min_repeat_times = -16;
static constexpr cudf::size_type max_repeat_times = 16;
#include <nvbench/nvbench.cuh>

static std::unique_ptr<cudf::table> create_data_table(cudf::size_type n_cols,
cudf::size_type n_rows,
cudf::size_type max_str_length)
static void bench_repeat(nvbench::state& state)
{
CUDF_EXPECTS(n_cols == 1 || n_cols == 2, "Invalid number of columns.");
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
auto const min_repeat = static_cast<cudf::size_type>(state.get_int64("min_repeat"));
auto const max_repeat = static_cast<cudf::size_type>(state.get_int64("max_repeat"));
auto const api = state.get_string("api");

std::vector<cudf::type_id> dtype_ids{cudf::type_id::STRING};
auto builder = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);

if (n_cols == 2) {
dtype_ids.push_back(cudf::type_id::INT32);
builder.distribution(
cudf::type_id::INT32, distribution_id::NORMAL, min_repeat_times, max_repeat_times);
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
builder.distribution(cudf::type_id::INT32, distribution_id::NORMAL, min_repeat, max_repeat);

auto const table = create_random_table(
{cudf::type_id::STRING, cudf::type_id::INT32}, row_count{num_rows}, data_profile{builder});
auto const input = cudf::strings_column_view(table->view().column(0));

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto chars_size = input.chars_size(stream);
state.add_global_memory_reads<nvbench::int8_t>(chars_size);

if (api == "scalar") {
state.add_global_memory_writes<nvbench::int8_t>(chars_size * max_repeat);
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::repeat_strings(input, max_repeat); });
} else if (api == "column") {
auto repeats = table->view().column(1);
{
auto result = cudf::strings::repeat_strings(input, repeats);
auto output = cudf::strings_column_view(result->view());
state.add_global_memory_writes<nvbench::int8_t>(output.chars_size(stream));
}
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::repeat_strings(input, repeats); });
}

return create_random_table(dtype_ids, row_count{n_rows}, data_profile{builder});
}

static void BM_repeat_strings_scalar_times(benchmark::State& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const table = create_data_table(1, n_rows, max_str_length);
auto const strings_col = cudf::strings_column_view(table->view().column(0));

for ([[maybe_unused]] auto _ : state) {
[[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
cudf::strings::repeat_strings(strings_col, default_repeat_times);
}

state.SetBytesProcessed(state.iterations() * strings_col.chars_size(cudf::get_default_stream()));
}

static void BM_repeat_strings_column_times(benchmark::State& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const table = create_data_table(2, n_rows, max_str_length);
auto const strings_col = cudf::strings_column_view(table->view().column(0));
auto const repeat_times_col = table->view().column(1);

for ([[maybe_unused]] auto _ : state) {
[[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
cudf::strings::repeat_strings(strings_col, repeat_times_col);
}

state.SetBytesProcessed(state.iterations() * (strings_col.chars_size(cudf::get_default_stream()) +
repeat_times_col.size() * sizeof(int32_t)));
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 8;
int const max_rows = 1 << 18;
int const row_mult = 4;
int const min_strlen = 1 << 4;
int const max_strlen = 1 << 8;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_strlen, max_strlen, len_mult);
}

class RepeatStrings : public cudf::benchmark {};

#define REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(RepeatStrings, name) \
(::benchmark::State & st) { BM_repeat_strings_scalar_times(st); } \
BENCHMARK_REGISTER_F(RepeatStrings, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

#define REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(RepeatStrings, name) \
(::benchmark::State & st) { BM_repeat_strings_column_times(st); } \
BENCHMARK_REGISTER_F(RepeatStrings, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times)
REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times)
NVBENCH_BENCH(bench_repeat)
.set_name("repeat")
.add_int64_axis("min_width", {0})
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("min_repeat", {0})
.add_int64_axis("max_repeat", {16})
.add_int64_axis("num_rows", {32768, 262144, 2097152})
.add_string_axis("api", {"scalar", "column"});
Loading