rapidsai · rapids-bot · Dec 13, 2024 · Dec 11, 2024
@@ -351,11 +351,18 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary
 
 # ##################################################################################################
 # * nvtext benchmark -------------------------------------------------------------------
-ConfigureBench(TEXT_BENCH text/subword.cpp)
-
 ConfigureNVBench(
-  TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp
-  text/ngrams.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp
+  TEXT_NVBENCH
+  text/edit_distance.cpp
+  text/hash_ngrams.cpp
+  text/jaccard.cpp
+  text/minhash.cpp
+  text/ngrams.cpp
+  text/normalize.cpp
+  text/replace.cpp
+  text/subword.cpp
+  text/tokenize.cpp
+  text/vocab.cpp
 )
 
 # ##################################################################################################

@@ -27,15 +27,11 @@
 static void bench_edit_distance(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const strings_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const strings_table = create_random_table(
     {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
   cudf::strings_column_view input1(strings_table->view().column(0));
@@ -55,5 +51,6 @@ static void bench_edit_distance(nvbench::state& state)
 
 NVBENCH_BENCH(bench_edit_distance)
   .set_name("edit_distance")
-  .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144})
-  .add_int64_axis("row_width", {8, 16, 32, 64, 128, 256});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144});
@@ -27,16 +27,12 @@
 static void bench_hash_ngrams(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const ngrams    = static_cast<cudf::size_type>(state.get_int64("ngrams"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const strings_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const strings_table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
   cudf::strings_column_view input(strings_table->view().column(0));
@@ -55,6 +51,7 @@ static void bench_hash_ngrams(nvbench::state& state)
 
 NVBENCH_BENCH(bench_hash_ngrams)
   .set_name("hash_ngrams")
-  .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144})
-  .add_int64_axis("row_width", {128, 512, 2048})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {128, 512, 2048})
+  .add_int64_axis("num_rows", {16384, 32768, 262144})
   .add_int64_axis("ngrams", {5, 10});
@@ -28,17 +28,13 @@
 static void bench_jaccard(nvbench::state& state)
 {
   auto const num_rows        = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width       = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width       = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width       = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const substring_width = static_cast<cudf::size_type>(state.get_int64("substring_width"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const strings_profile =
     data_profile_builder()
-      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width)
       .no_validity();
   auto const input_table = create_random_table(
     {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
@@ -59,6 +55,7 @@ static void bench_jaccard(nvbench::state& state)
 
 NVBENCH_BENCH(bench_jaccard)
   .set_name("jaccard")
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {128, 512, 1024, 2048})
   .add_int64_axis("num_rows", {32768, 131072, 262144})
-  .add_int64_axis("row_width", {128, 512, 1024, 2048})
   .add_int64_axis("substring_width", {5, 10});
@@ -28,16 +28,12 @@
 static void bench_normalize(nvbench::state& state)
 {
   auto const num_rows       = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width      = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width      = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width      = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const normalize_type = state.get_string("type");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
 
@@ -60,6 +56,7 @@ static void bench_normalize(nvbench::state& state)
 
 NVBENCH_BENCH(bench_normalize)
   .set_name("normalize")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"spaces", "characters", "to_lower"});
@@ -31,11 +31,6 @@ static void bench_replace(nvbench::state& state)
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   std::vector<std::string> words{" ",        "one  ",    "two ",       "three ",     "four ",
                                  "five ",    "six  ",    "sevén  ",    "eight ",     "nine ",
                                  "ten   ",   "eleven ",  "twelve ",    "thirteen  ", "fourteen ",
@@ -71,5 +66,5 @@ static void bench_replace(nvbench::state& state)
 
 NVBENCH_BENCH(bench_replace)
   .set_name("replace")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
-
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/file_utilities.hpp>
 
 #include <cudf/strings/strings_column_view.hpp>
 
 #include <nvtext/subword_tokenize.hpp>
 
+#include <nvbench/nvbench.cuh>
+
 #include <filesystem>
 #include <fstream>
 #include <iostream>
@@ -54,40 +53,33 @@ static std::string create_hash_vocab_file()
   return hash_file;
 }
 
-static void BM_subword_tokenizer(benchmark::State& state)
+static void bench_subword_tokenizer(nvbench::state& state)
 {
-  auto const nrows = static_cast<cudf::size_type>(state.range(0));
-  std::vector<char const*> h_strings(nrows, "This is a test ");
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+
+  std::vector<char const*> h_strings(num_rows, "This is a test ");
   cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end());
   static std::string hash_file = create_hash_vocab_file();
   std::vector<uint32_t> offsets{14};
-  uint32_t max_sequence_length = 64;
-  uint32_t stride              = 48;
-  uint32_t do_truncate         = 0;
-  uint32_t do_lower            = 1;
-  //
-  auto vocab = nvtext::load_vocabulary_file(hash_file);
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    auto result = nvtext::subword_tokenize(cudf::strings_column_view{strings},
-                                           *vocab,
-                                           max_sequence_length,
-                                           stride,
-                                           do_lower,
-                                           do_truncate);
-  }
-}
+  uint32_t max_sequence = 64;
+  uint32_t stride       = 48;
+  uint32_t do_truncate  = 0;
+  uint32_t do_lower     = 1;
 
-class Subword : public cudf::benchmark {};
+  auto input = cudf::strings_column_view{strings};
 
-#define SUBWORD_BM_BENCHMARK_DEFINE(name)                                                        \
-  BENCHMARK_DEFINE_F(Subword, name)(::benchmark::State & state) { BM_subword_tokenizer(state); } \
-  BENCHMARK_REGISTER_F(Subword, name)                                                            \
-    ->RangeMultiplier(2)                                                                         \
-    ->Range(1 << 10, 1 << 17)                                                                    \
-    ->UseManualTime()                                                                            \
-    ->Unit(benchmark::kMillisecond);
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  auto chars_size = input.chars_size(cudf::get_default_stream());
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);
+  state.add_global_memory_writes<nvbench::int32_t>(num_rows * max_sequence);
 
-SUBWORD_BM_BENCHMARK_DEFINE(BM_subword_tokenizer);
+  auto vocab = nvtext::load_vocabulary_file(hash_file);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto result =
+      nvtext::subword_tokenize(input, *vocab, max_sequence, stride, do_lower, do_truncate);
+  });
+}
 
-// BENCHMARK_MAIN();
+NVBENCH_BENCH(bench_subword_tokenizer)
+  .set_name("subword_tokenize")
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
@@ -31,17 +31,13 @@
 static void bench_tokenize(nvbench::state& state)
 {
   auto const num_rows      = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width     = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width     = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width     = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const tokenize_type = state.get_string("type");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile =
     data_profile_builder()
-      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width)
       .no_validity();
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
@@ -82,6 +78,7 @@ static void bench_tokenize(nvbench::state& state)
 
 NVBENCH_BENCH(bench_tokenize)
   .set_name("tokenize")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"whitespace", "multi", "count", "count_multi", "ngrams", "characters"});
@@ -33,16 +33,12 @@ static void bench_vocab_tokenize(nvbench::state& state)
 {
   auto const stream    = cudf::get_default_stream();
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
-  auto const column = [num_rows, row_width] {
+  auto const column = [num_rows, min_width, max_width] {
     data_profile const profile = data_profile_builder().no_validity().distribution(
-      cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+      cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
     auto const col = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
     return cudf::strings::filter_characters_of_type(
       cudf::strings_column_view(col->view()),
@@ -85,5 +81,6 @@ static void bench_vocab_tokenize(nvbench::state& state)
 
 NVBENCH_BENCH(bench_vocab_tokenize)
   .set_name("vocab_tokenize")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {262144, 524288, 1048576, 2097152, 4194304, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});