diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 95f0db895a8..daebf42f531 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -272,12 +272,11 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.c # ################################################################################################## # * nvtext benchmark ------------------------------------------------------------------- -ConfigureBench( - TEXT_BENCH text/ngrams.cpp text/normalize.cpp text/normalize_spaces.cpp text/replace.cpp - text/subword.cpp text/tokenize.cpp -) +ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp) -ConfigureNVBench(TEXT_NVBENCH text/minhash.cpp) +ConfigureNVBench( + TEXT_NVBENCH text/minhash.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp +) # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp index 733f2da8b2a..6878fa4f8b6 100644 --- a/cpp/benchmarks/text/normalize.cpp +++ b/cpp/benchmarks/text/normalize.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include @@ -24,51 +23,43 @@ #include -class TextNormalize : public cudf::benchmark {}; +#include -static void BM_normalize(benchmark::State& state, bool to_lower) +static void bench_normalize(nvbench::state& state) { - auto const n_rows = static_cast(state.range(0)); - auto const max_str_length = static_cast(state.range(1)); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const normalize_type = state.get_string("type"); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); cudf::strings_column_view input(column->view()); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - nvtext::normalize_characters(input, to_lower); - } + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.SetBytesProcessed(state.iterations() * input.chars_size()); -} + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 4; - for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { - for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { - // avoid generating combinations that exceed the cudf column limit - size_t total_chars = static_cast(row_count) * rowlen * 4; - if (total_chars < static_cast(std::numeric_limits::max())) { - b->Args({row_count, rowlen}); - } - } + if (normalize_type == "spaces") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::normalize_spaces(input); }); + } else { + bool const to_lower = (normalize_type == "to_lower"); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::normalize_characters(input, to_lower); + }); } } -#define NVTEXT_BENCHMARK_DEFINE(name, lower) \ - BENCHMARK_DEFINE_F(TextNormalize, name) \ - (::benchmark::State & st) { BM_normalize(st, lower); } \ - BENCHMARK_REGISTER_F(TextNormalize, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -NVTEXT_BENCHMARK_DEFINE(characters, false) -NVTEXT_BENCHMARK_DEFINE(to_lower, true) +NVBENCH_BENCH(bench_normalize) + .set_name("normalize") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("type", {"spaces", "characters", "to_lower"}); diff --git a/cpp/benchmarks/text/normalize_spaces.cpp b/cpp/benchmarks/text/normalize_spaces.cpp deleted file mode 100644 index 82d9316e25b..00000000000 --- a/cpp/benchmarks/text/normalize_spaces.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include - -class TextNormalize : public cudf::benchmark {}; - -static void BM_normalize(benchmark::State& state) -{ - auto const n_rows = static_cast(state.range(0)); - auto const max_str_length = static_cast(state.range(1)); - data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); - cudf::strings_column_view input(column->view()); - - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - nvtext::normalize_spaces(input); - } - - state.SetBytesProcessed(state.iterations() * input.chars_size()); -} - -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); -} - -#define NVTEXT_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(TextNormalize, name) \ - (::benchmark::State & st) { BM_normalize(st); } \ - BENCHMARK_REGISTER_F(TextNormalize, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -NVTEXT_BENCHMARK_DEFINE(spaces) diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp index 21d69c4d40e..257f62aa728 100644 --- a/cpp/benchmarks/text/replace.cpp +++ b/cpp/benchmarks/text/replace.cpp @@ -15,8 +15,6 @@ */ #include -#include -#include #include @@ -24,14 +22,19 @@ #include -#include +#include -class TextReplace : public cudf::benchmark {}; +#include -static void BM_replace(benchmark::State& state) +static void bench_replace(nvbench::state& state) { - auto const n_rows = static_cast(state.range(0)); - auto const n_length = static_cast(state.range(1)); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } std::vector words{" ", "one ", "two ", "three ", "four ", "five ", "six ", "sevén ", "eight ", "nine ", @@ -41,46 +44,32 @@ static void BM_replace(benchmark::State& state) std::default_random_engine generator; std::uniform_int_distribution tokens_dist(0, words.size() - 1); std::string row; // build a row of random tokens - while (static_cast(row.size()) < n_length) + while (static_cast(row.size()) < row_width) row += words[tokens_dist(generator)]; std::uniform_int_distribution position_dist(0, 16); auto elements = cudf::detail::make_counting_transform_iterator( 0, [&](auto idx) { return row.c_str() + position_dist(generator); }); - cudf::test::strings_column_wrapper input(elements, elements + n_rows); + cudf::test::strings_column_wrapper input(elements, elements + num_rows); cudf::strings_column_view view(input); cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"}); cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"}); - for (auto _ : state) { - cuda_event_timer raii(state, true); - nvtext::replace_tokens( - view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements)); - } + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.SetBytesProcessed(state.iterations() * view.chars_size()); -} + auto chars_size = view.chars_size(); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_multiplier = 8; - int const min_row_length = 1 << 5; - int const max_row_length = 1 << 13; - int const length_multiplier = 4; - generate_string_bench_args( - b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::replace_tokens( + view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements)); + }); } -#define NVTEXT_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(TextReplace, name) \ - (::benchmark::State & st) { BM_replace(st); } \ - BENCHMARK_REGISTER_F(TextReplace, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -NVTEXT_BENCHMARK_DEFINE(replace) +NVBENCH_BENCH(bench_replace) + .set_name("replace") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp index bd80af08a74..423fe667b05 100644 --- a/cpp/benchmarks/text/tokenize.cpp +++ b/cpp/benchmarks/text/tokenize.cpp @@ -16,8 +16,6 @@ #include #include -#include -#include #include @@ -28,73 +26,57 @@ #include #include -class TextTokenize : public cudf::benchmark {}; +#include -enum class tokenize_type { single, multi, count, count_multi, ngrams, characters }; - -static void BM_tokenize(benchmark::State& state, tokenize_type tt) +static void bench_tokenize(nvbench::state& state) { - auto const n_rows = static_cast(state.range(0)); - auto const max_str_length = static_cast(state.range(1)); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const tokenize_type = state.get_string("type"); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); cudf::strings_column_view input(column->view()); - cudf::test::strings_column_wrapper delimiters({" ", "+", "-"}); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - switch (tt) { - case tokenize_type::single: - // single whitespace delimiter - nvtext::tokenize(input); - break; - case tokenize_type::multi: - nvtext::tokenize(input, cudf::strings_column_view(delimiters)); - break; - case tokenize_type::count: - // single whitespace delimiter - nvtext::count_tokens(input); - break; - case tokenize_type::count_multi: - nvtext::count_tokens(input, cudf::strings_column_view(delimiters)); - break; - case tokenize_type::ngrams: - // default is bigrams - nvtext::ngrams_tokenize(input); - break; - case tokenize_type::characters: - // every character becomes a string - nvtext::character_tokenize(input); - break; - } - } + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.SetBytesProcessed(state.iterations() * input.chars_size()); -} + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + if (tokenize_type == "whitespace") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::tokenize(input); }); + } else if (tokenize_type == "multi") { + cudf::test::strings_column_wrapper delimiters({" ", "+", "-"}); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::tokenize(input, cudf::strings_column_view(delimiters)); + }); + } else if (tokenize_type == "count") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::count_tokens(input); }); + } else if (tokenize_type == "count_multi") { + cudf::test::strings_column_wrapper delimiters({" ", "+", "-"}); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::count_tokens(input, cudf::strings_column_view(delimiters)); + }); + } else if (tokenize_type == "ngrams") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::ngrams_tokenize(input); }); + } else if (tokenize_type == "characters") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::character_tokenize(input); }); + } } -#define NVTEXT_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(TextTokenize, name) \ - (::benchmark::State & st) { BM_tokenize(st, tokenize_type::name); } \ - BENCHMARK_REGISTER_F(TextTokenize, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -NVTEXT_BENCHMARK_DEFINE(single) -NVTEXT_BENCHMARK_DEFINE(multi) -NVTEXT_BENCHMARK_DEFINE(count) -NVTEXT_BENCHMARK_DEFINE(count_multi) -NVTEXT_BENCHMARK_DEFINE(ngrams) -NVTEXT_BENCHMARK_DEFINE(characters) +NVBENCH_BENCH(bench_tokenize) + .set_name("tokenize") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("type", {"whitespace", "multi", "count", "count_multi", "ngrams", "characters"});