Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move some nvtext benchmarks to nvbench #13368

Merged
merged 11 commits into from
May 24, 2023
Merged
9 changes: 4 additions & 5 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,11 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.c

# ##################################################################################################
# * nvtext benchmark -------------------------------------------------------------------
ConfigureBench(
TEXT_BENCH text/ngrams.cpp text/normalize.cpp text/normalize_spaces.cpp text/replace.cpp
text/subword.cpp text/tokenize.cpp
)
ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp)

ConfigureNVBench(TEXT_NVBENCH text/minhash.cpp)
ConfigureNVBench(
TEXT_NVBENCH text/minhash.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp
)

# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
Expand Down
69 changes: 30 additions & 39 deletions cpp/benchmarks/text/normalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,59 +16,50 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvtext/normalize.hpp>

class TextNormalize : public cudf::benchmark {};
#include <nvbench/nvbench.cuh>

static void BM_normalize(benchmark::State& state, bool to_lower)
static void bench_normalize(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const normalize_type = state.get_string("type");

if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
cudf::strings_column_view input(column->view());

for (auto _ : state) {
cuda_event_timer raii(state, true, cudf::get_default_stream());
nvtext::normalize_characters(input, to_lower);
}
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

state.SetBytesProcessed(state.iterations() * input.chars_size());
}
auto chars_size = input.chars_size();
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) {
for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) {
// avoid generating combinations that exceed the cudf column limit
size_t total_chars = static_cast<size_t>(row_count) * rowlen * 4;
if (total_chars < static_cast<size_t>(std::numeric_limits<cudf::size_type>::max())) {
b->Args({row_count, rowlen});
}
}
if (normalize_type == "spaces") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::normalize_spaces(input); });
} else {
bool const to_lower = (normalize_type == "to_lower");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::normalize_characters(input, to_lower);
});
}
}

#define NVTEXT_BENCHMARK_DEFINE(name, lower) \
BENCHMARK_DEFINE_F(TextNormalize, name) \
(::benchmark::State & st) { BM_normalize(st, lower); } \
BENCHMARK_REGISTER_F(TextNormalize, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(characters, false)
NVTEXT_BENCHMARK_DEFINE(to_lower, true)
NVBENCH_BENCH(bench_normalize)
.set_name("normalize")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
.add_string_axis("type", {"spaces", "characters", "to_lower"});
66 changes: 0 additions & 66 deletions cpp/benchmarks/text/normalize_spaces.cpp

This file was deleted.

59 changes: 24 additions & 35 deletions cpp/benchmarks/text/replace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,26 @@
*/

#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/string/string_bench_args.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <cudf/strings/strings_column_view.hpp>

#include <nvtext/replace.hpp>

#include <random>
#include <nvbench/nvbench.cuh>

class TextReplace : public cudf::benchmark {};
#include <random>

static void BM_replace(benchmark::State& state)
static void bench_replace(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const n_length = static_cast<cudf::size_type>(state.range(1));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));

if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

std::vector<std::string> words{" ", "one ", "two ", "three ", "four ",
"five ", "six ", "sevén ", "eight ", "nine ",
Expand All @@ -41,46 +44,32 @@ static void BM_replace(benchmark::State& state)
std::default_random_engine generator;
std::uniform_int_distribution<int> tokens_dist(0, words.size() - 1);
std::string row; // build a row of random tokens
while (static_cast<int>(row.size()) < n_length)
while (static_cast<cudf::size_type>(row.size()) < row_width)
row += words[tokens_dist(generator)];

std::uniform_int_distribution<int> position_dist(0, 16);

auto elements = cudf::detail::make_counting_transform_iterator(
0, [&](auto idx) { return row.c_str() + position_dist(generator); });
cudf::test::strings_column_wrapper input(elements, elements + n_rows);
cudf::test::strings_column_wrapper input(elements, elements + num_rows);
cudf::strings_column_view view(input);

cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"});
cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"});

for (auto _ : state) {
cuda_event_timer raii(state, true);
nvtext::replace_tokens(
view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements));
}
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

state.SetBytesProcessed(state.iterations() * view.chars_size());
}
auto chars_size = view.chars_size();
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_multiplier = 8;
int const min_row_length = 1 << 5;
int const max_row_length = 1 << 13;
int const length_multiplier = 4;
generate_string_bench_args(
b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::replace_tokens(
view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements));
});
}

#define NVTEXT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(TextReplace, name) \
(::benchmark::State & st) { BM_replace(st); } \
BENCHMARK_REGISTER_F(TextReplace, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(replace)
NVBENCH_BENCH(bench_replace)
.set_name("replace")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
108 changes: 45 additions & 63 deletions cpp/benchmarks/text/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/string/string_bench_args.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>

Expand All @@ -28,73 +26,57 @@
#include <nvtext/ngrams_tokenize.hpp>
#include <nvtext/tokenize.hpp>

class TextTokenize : public cudf::benchmark {};
#include <nvbench/nvbench.cuh>

enum class tokenize_type { single, multi, count, count_multi, ngrams, characters };

static void BM_tokenize(benchmark::State& state, tokenize_type tt)
static void bench_tokenize(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const tokenize_type = state.get_string("type");

if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
cudf::strings_column_view input(column->view());
cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});

for (auto _ : state) {
cuda_event_timer raii(state, true, cudf::get_default_stream());
switch (tt) {
case tokenize_type::single:
// single whitespace delimiter
nvtext::tokenize(input);
break;
case tokenize_type::multi:
nvtext::tokenize(input, cudf::strings_column_view(delimiters));
break;
case tokenize_type::count:
// single whitespace delimiter
nvtext::count_tokens(input);
break;
case tokenize_type::count_multi:
nvtext::count_tokens(input, cudf::strings_column_view(delimiters));
break;
case tokenize_type::ngrams:
// default is bigrams
nvtext::ngrams_tokenize(input);
break;
case tokenize_type::characters:
// every character becomes a string
nvtext::character_tokenize(input);
break;
}
}
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

state.SetBytesProcessed(state.iterations() * input.chars_size());
}
auto chars_size = input.chars_size();
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
if (tokenize_type == "whitespace") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::tokenize(input); });
} else if (tokenize_type == "multi") {
cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::tokenize(input, cudf::strings_column_view(delimiters));
});
} else if (tokenize_type == "count") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::count_tokens(input); });
} else if (tokenize_type == "count_multi") {
cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::count_tokens(input, cudf::strings_column_view(delimiters));
});
} else if (tokenize_type == "ngrams") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::ngrams_tokenize(input); });
} else if (tokenize_type == "characters") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::character_tokenize(input); });
}
}

#define NVTEXT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(TextTokenize, name) \
(::benchmark::State & st) { BM_tokenize(st, tokenize_type::name); } \
BENCHMARK_REGISTER_F(TextTokenize, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(single)
NVTEXT_BENCHMARK_DEFINE(multi)
NVTEXT_BENCHMARK_DEFINE(count)
NVTEXT_BENCHMARK_DEFINE(count_multi)
NVTEXT_BENCHMARK_DEFINE(ngrams)
NVTEXT_BENCHMARK_DEFINE(characters)
NVBENCH_BENCH(bench_tokenize)
.set_name("tokenize")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
.add_string_axis("type", {"whitespace", "multi", "count", "count_multi", "ngrams", "characters"});