diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 95f0db895a8..daebf42f531 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -272,12 +272,11 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.c
 
 # ##################################################################################################
 # * nvtext benchmark -------------------------------------------------------------------
-ConfigureBench(
-  TEXT_BENCH text/ngrams.cpp text/normalize.cpp text/normalize_spaces.cpp text/replace.cpp
-  text/subword.cpp text/tokenize.cpp
-)
+ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp)
 
-ConfigureNVBench(TEXT_NVBENCH text/minhash.cpp)
+ConfigureNVBench(
+  TEXT_NVBENCH text/minhash.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp
+)
 
 # ##################################################################################################
 # * strings benchmark -------------------------------------------------------------------
diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp
index 733f2da8b2a..6878fa4f8b6 100644
--- a/cpp/benchmarks/text/normalize.cpp
+++ b/cpp/benchmarks/text/normalize.cpp
@@ -16,7 +16,6 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -24,51 +23,43 @@
 
 #include <nvtext/normalize.hpp>
 
-class TextNormalize : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-static void BM_normalize(benchmark::State& state, bool to_lower)
+static void bench_normalize(nvbench::state& state)
 {
-  auto const n_rows          = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length  = static_cast<cudf::size_type>(state.range(1));
+  auto const num_rows       = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const row_width      = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const normalize_type = state.get_string("type");
+
+  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
+      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
+    state.skip("Skip benchmarks greater than size_type limit");
+  }
+
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
+    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
 
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
-    nvtext::normalize_characters(input, to_lower);
-  }
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
 
-  state.SetBytesProcessed(state.iterations() * input.chars_size());
-}
+  auto chars_size = input.chars_size();
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);
+  state.add_global_memory_writes<nvbench::int8_t>(chars_size);
 
-static void generate_bench_args(benchmark::internal::Benchmark* b)
-{
-  int const min_rows   = 1 << 12;
-  int const max_rows   = 1 << 24;
-  int const row_mult   = 8;
-  int const min_rowlen = 1 << 5;
-  int const max_rowlen = 1 << 13;
-  int const len_mult   = 4;
-  for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) {
-    for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) {
-      // avoid generating combinations that exceed the cudf column limit
-      size_t total_chars = static_cast<size_t>(row_count) * rowlen * 4;
-      if (total_chars < static_cast<size_t>(std::numeric_limits<cudf::size_type>::max())) {
-        b->Args({row_count, rowlen});
-      }
-    }
+  if (normalize_type == "spaces") {
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { auto result = nvtext::normalize_spaces(input); });
+  } else {
+    bool const to_lower = (normalize_type == "to_lower");
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      auto result = nvtext::normalize_characters(input, to_lower);
+    });
   }
 }
 
-#define NVTEXT_BENCHMARK_DEFINE(name, lower)             \
-  BENCHMARK_DEFINE_F(TextNormalize, name)                \
-  (::benchmark::State & st) { BM_normalize(st, lower); } \
-  BENCHMARK_REGISTER_F(TextNormalize, name)              \
-    ->Apply(generate_bench_args)                         \
-    ->UseManualTime()                                    \
-    ->Unit(benchmark::kMillisecond);
-
-NVTEXT_BENCHMARK_DEFINE(characters, false)
-NVTEXT_BENCHMARK_DEFINE(to_lower, true)
+NVBENCH_BENCH(bench_normalize)
+  .set_name("normalize")
+  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
+  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_string_axis("type", {"spaces", "characters", "to_lower"});
diff --git a/cpp/benchmarks/text/normalize_spaces.cpp b/cpp/benchmarks/text/normalize_spaces.cpp
deleted file mode 100644
index 82d9316e25b..00000000000
--- a/cpp/benchmarks/text/normalize_spaces.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/string/string_bench_args.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
-
-#include <cudf/scalar/scalar.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/utilities/default_stream.hpp>
-
-#include <nvtext/normalize.hpp>
-
-class TextNormalize : public cudf::benchmark {};
-
-static void BM_normalize(benchmark::State& state)
-{
-  auto const n_rows          = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length  = static_cast<cudf::size_type>(state.range(1));
-  data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
-  cudf::strings_column_view input(column->view());
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
-    nvtext::normalize_spaces(input);
-  }
-
-  state.SetBytesProcessed(state.iterations() * input.chars_size());
-}
-
-static void generate_bench_args(benchmark::internal::Benchmark* b)
-{
-  int const min_rows   = 1 << 12;
-  int const max_rows   = 1 << 24;
-  int const row_mult   = 8;
-  int const min_rowlen = 1 << 5;
-  int const max_rowlen = 1 << 13;
-  int const len_mult   = 4;
-  generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
-}
-
-#define NVTEXT_BENCHMARK_DEFINE(name)             \
-  BENCHMARK_DEFINE_F(TextNormalize, name)         \
-  (::benchmark::State & st) { BM_normalize(st); } \
-  BENCHMARK_REGISTER_F(TextNormalize, name)       \
-    ->Apply(generate_bench_args)                  \
-    ->UseManualTime()                             \
-    ->Unit(benchmark::kMillisecond);
-
-NVTEXT_BENCHMARK_DEFINE(spaces)
diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp
index 21d69c4d40e..257f62aa728 100644
--- a/cpp/benchmarks/text/replace.cpp
+++ b/cpp/benchmarks/text/replace.cpp
@@ -15,8 +15,6 @@
  */
 
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/string/string_bench_args.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf_test/column_wrapper.hpp>
 
@@ -24,14 +22,19 @@
 
 #include <nvtext/replace.hpp>
 
-#include <random>
+#include <nvbench/nvbench.cuh>
 
-class TextReplace : public cudf::benchmark {};
+#include <random>
 
-static void BM_replace(benchmark::State& state)
+static void bench_replace(nvbench::state& state)
 {
-  auto const n_rows   = static_cast<cudf::size_type>(state.range(0));
-  auto const n_length = static_cast<cudf::size_type>(state.range(1));
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+
+  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
+      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
+    state.skip("Skip benchmarks greater than size_type limit");
+  }
 
   std::vector<std::string> words{" ",        "one  ",    "two ",       "three ",     "four ",
                                  "five ",    "six  ",    "sevén  ",    "eight ",     "nine ",
@@ -41,46 +44,32 @@ static void BM_replace(benchmark::State& state)
   std::default_random_engine generator;
   std::uniform_int_distribution<int> tokens_dist(0, words.size() - 1);
   std::string row;  // build a row of random tokens
-  while (static_cast<int>(row.size()) < n_length)
+  while (static_cast<cudf::size_type>(row.size()) < row_width)
     row += words[tokens_dist(generator)];
 
   std::uniform_int_distribution<int> position_dist(0, 16);
 
   auto elements = cudf::detail::make_counting_transform_iterator(
     0, [&](auto idx) { return row.c_str() + position_dist(generator); });
-  cudf::test::strings_column_wrapper input(elements, elements + n_rows);
+  cudf::test::strings_column_wrapper input(elements, elements + num_rows);
   cudf::strings_column_view view(input);
 
   cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"});
   cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"});
 
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    nvtext::replace_tokens(
-      view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements));
-  }
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
 
-  state.SetBytesProcessed(state.iterations() * view.chars_size());
-}
+  auto chars_size = view.chars_size();
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);
+  state.add_global_memory_writes<nvbench::int8_t>(chars_size);
 
-static void generate_bench_args(benchmark::internal::Benchmark* b)
-{
-  int const min_rows          = 1 << 12;
-  int const max_rows          = 1 << 24;
-  int const row_multiplier    = 8;
-  int const min_row_length    = 1 << 5;
-  int const max_row_length    = 1 << 13;
-  int const length_multiplier = 4;
-  generate_string_bench_args(
-    b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto result = nvtext::replace_tokens(
+      view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements));
+  });
 }
 
-#define NVTEXT_BENCHMARK_DEFINE(name)           \
-  BENCHMARK_DEFINE_F(TextReplace, name)         \
-  (::benchmark::State & st) { BM_replace(st); } \
-  BENCHMARK_REGISTER_F(TextReplace, name)       \
-    ->Apply(generate_bench_args)                \
-    ->UseManualTime()                           \
-    ->Unit(benchmark::kMillisecond);
-
-NVTEXT_BENCHMARK_DEFINE(replace)
+NVBENCH_BENCH(bench_replace)
+  .set_name("replace")
+  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
+  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp
index bd80af08a74..423fe667b05 100644
--- a/cpp/benchmarks/text/tokenize.cpp
+++ b/cpp/benchmarks/text/tokenize.cpp
@@ -16,8 +16,6 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/string/string_bench_args.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf_test/column_wrapper.hpp>
 
@@ -28,73 +26,57 @@
 #include <nvtext/ngrams_tokenize.hpp>
 #include <nvtext/tokenize.hpp>
 
-class TextTokenize : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-enum class tokenize_type { single, multi, count, count_multi, ngrams, characters };
-
-static void BM_tokenize(benchmark::State& state, tokenize_type tt)
+static void bench_tokenize(nvbench::state& state)
 {
-  auto const n_rows          = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length  = static_cast<cudf::size_type>(state.range(1));
+  auto const num_rows      = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const row_width     = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const tokenize_type = state.get_string("type");
+
+  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
+      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
+    state.skip("Skip benchmarks greater than size_type limit");
+  }
+
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
+    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
-  cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});
 
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
-    switch (tt) {
-      case tokenize_type::single:
-        // single whitespace delimiter
-        nvtext::tokenize(input);
-        break;
-      case tokenize_type::multi:
-        nvtext::tokenize(input, cudf::strings_column_view(delimiters));
-        break;
-      case tokenize_type::count:
-        // single whitespace delimiter
-        nvtext::count_tokens(input);
-        break;
-      case tokenize_type::count_multi:
-        nvtext::count_tokens(input, cudf::strings_column_view(delimiters));
-        break;
-      case tokenize_type::ngrams:
-        // default is bigrams
-        nvtext::ngrams_tokenize(input);
-        break;
-      case tokenize_type::characters:
-        // every character becomes a string
-        nvtext::character_tokenize(input);
-        break;
-    }
-  }
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
 
-  state.SetBytesProcessed(state.iterations() * input.chars_size());
-}
+  auto chars_size = input.chars_size();
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);
+  state.add_global_memory_writes<nvbench::int8_t>(chars_size);
 
-static void generate_bench_args(benchmark::internal::Benchmark* b)
-{
-  int const min_rows   = 1 << 12;
-  int const max_rows   = 1 << 24;
-  int const row_mult   = 8;
-  int const min_rowlen = 1 << 5;
-  int const max_rowlen = 1 << 13;
-  int const len_mult   = 4;
-  generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
+  if (tokenize_type == "whitespace") {
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { auto result = nvtext::tokenize(input); });
+  } else if (tokenize_type == "multi") {
+    cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      auto result = nvtext::tokenize(input, cudf::strings_column_view(delimiters));
+    });
+  } else if (tokenize_type == "count") {
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { auto result = nvtext::count_tokens(input); });
+  } else if (tokenize_type == "count_multi") {
+    cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      auto result = nvtext::count_tokens(input, cudf::strings_column_view(delimiters));
+    });
+  } else if (tokenize_type == "ngrams") {
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { auto result = nvtext::ngrams_tokenize(input); });
+  } else if (tokenize_type == "characters") {
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { auto result = nvtext::character_tokenize(input); });
+  }
 }
 
-#define NVTEXT_BENCHMARK_DEFINE(name)                                 \
-  BENCHMARK_DEFINE_F(TextTokenize, name)                              \
-  (::benchmark::State & st) { BM_tokenize(st, tokenize_type::name); } \
-  BENCHMARK_REGISTER_F(TextTokenize, name)                            \
-    ->Apply(generate_bench_args)                                      \
-    ->UseManualTime()                                                 \
-    ->Unit(benchmark::kMillisecond);
-
-NVTEXT_BENCHMARK_DEFINE(single)
-NVTEXT_BENCHMARK_DEFINE(multi)
-NVTEXT_BENCHMARK_DEFINE(count)
-NVTEXT_BENCHMARK_DEFINE(count_multi)
-NVTEXT_BENCHMARK_DEFINE(ngrams)
-NVTEXT_BENCHMARK_DEFINE(characters)
+NVBENCH_BENCH(bench_tokenize)
+  .set_name("tokenize")
+  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
+  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_string_axis("type", {"whitespace", "multi", "count", "count_multi", "ngrams", "characters"});