diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 4e20c979f6c..eae915c47fe 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -137,6 +137,7 @@ test:
     - test -f $PREFIX/include/cudf/io/orc_metadata.hpp
     - test -f $PREFIX/include/cudf/io/orc.hpp
     - test -f $PREFIX/include/cudf/io/parquet.hpp
+    - test -f $PREFIX/include/cudf/io/text/byte_range_info.hpp
     - test -f $PREFIX/include/cudf/io/text/data_chunk_source_factories.hpp
     - test -f $PREFIX/include/cudf/io/text/data_chunk_source.hpp
     - test -f $PREFIX/include/cudf/io/text/detail/multistate.hpp
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2ffd62f1b53..825ea37c6ac 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -311,6 +311,7 @@ add_library(
   src/io/parquet/writer_impl.cu
   src/io/statistics/orc_column_statistics.cu
   src/io/statistics/parquet_column_statistics.cu
+  src/io/text/byte_range_info.cpp
   src/io/text/multibyte_split.cu
   src/io/utilities/column_buffer.cpp
   src/io/utilities/config_utils.cpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 99aeff0df93..054410c3265 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -276,7 +276,7 @@ ConfigureBench(
 
 # ##################################################################################################
 # * json benchmark -------------------------------------------------------------------
-ConfigureBench(JSON_BENCH string/json.cpp)
+ConfigureBench(JSON_BENCH string/json.cu)
 
 # ##################################################################################################
 # * io benchmark ---------------------------------------------------------------------
diff --git a/cpp/benchmarks/common/generate_input.cpp b/cpp/benchmarks/common/generate_input.cpp
index d6564428a2e..6330beda54c 100644
--- a/cpp/benchmarks/common/generate_input.cpp
+++ b/cpp/benchmarks/common/generate_input.cpp
@@ -122,7 +122,7 @@ struct random_value_fn;
  * @brief Creates an random timestamp/duration value
  */
 template <typename T>
-struct random_value_fn<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
+struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
   std::function<int64_t(std::mt19937&)> seconds_gen;
   std::function<int64_t(std::mt19937&)> nanoseconds_gen;
 
@@ -164,7 +164,7 @@ struct random_value_fn<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
  * @brief Creates an random fixed_point value. Not implemented yet.
  */
 template <typename T>
-struct random_value_fn<T, typename std::enable_if_t<cudf::is_fixed_point<T>()>> {
+struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
   using rep = typename T::rep;
   rep const lower_bound;
   rep const upper_bound;
@@ -194,9 +194,7 @@ struct random_value_fn<T, typename std::enable_if_t<cudf::is_fixed_point<T>()>>
  * @brief Creates an random numeric value with the given distribution.
  */
 template <typename T>
-struct random_value_fn<
-  T,
-  typename std::enable_if_t<!std::is_same_v<T, bool> && cudf::is_numeric<T>()>> {
+struct random_value_fn<T, std::enable_if_t<!std::is_same_v<T, bool> && cudf::is_numeric<T>()>> {
   T const lower_bound;
   T const upper_bound;
   distribution_fn<T> dist;
@@ -219,7 +217,7 @@ struct random_value_fn<
  * @brief Creates an boolean value with given probability of returning `true`.
  */
 template <typename T>
-struct random_value_fn<T, typename std::enable_if_t<std::is_same_v<T, bool>>> {
+struct random_value_fn<T, std::enable_if_t<std::is_same_v<T, bool>>> {
   std::bernoulli_distribution b_dist;
 
   random_value_fn(distribution_params<bool> const& desc) : b_dist{desc.probability_true} {}
@@ -260,7 +258,7 @@ struct stored_as {
 
 // Use `int8_t` for bools because that's how they're stored in columns
 template <typename T>
-struct stored_as<T, typename std::enable_if_t<std::is_same_v<T, bool>>> {
+struct stored_as<T, std::enable_if_t<std::is_same_v<T, bool>>> {
   using type = int8_t;
 };
 
diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp
index 17bd650e722..43fee5c50a7 100644
--- a/cpp/benchmarks/common/generate_input.hpp
+++ b/cpp/benchmarks/common/generate_input.hpp
@@ -128,9 +128,7 @@ struct distribution_params;
  * @brief Numeric values are parameterized with a distribution type and bounds of the same type.
  */
 template <typename T>
-struct distribution_params<
-  T,
-  typename std::enable_if_t<!std::is_same_v<T, bool> && cudf::is_numeric<T>()>> {
+struct distribution_params<T, std::enable_if_t<!std::is_same_v<T, bool> && cudf::is_numeric<T>()>> {
   distribution_id id;
   T lower_bound;
   T upper_bound;
@@ -140,7 +138,7 @@ struct distribution_params<
  * @brief Booleans are parameterized with the probability of getting `true` value.
  */
 template <typename T>
-struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, bool>>> {
+struct distribution_params<T, std::enable_if_t<std::is_same_v<T, bool>>> {
   double probability_true;
 };
 
@@ -148,7 +146,7 @@ struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, bool>>
  * @brief Timestamps and durations are parameterized with a distribution type and int64_t bounds.
  */
 template <typename T>
-struct distribution_params<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
+struct distribution_params<T, std::enable_if_t<cudf::is_chrono<T>()>> {
   distribution_id id;
   int64_t lower_bound;
   int64_t upper_bound;
@@ -158,7 +156,7 @@ struct distribution_params<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
  * @brief Strings are parameterized by the distribution of their length, as an integral value.
  */
 template <typename T>
-struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, cudf::string_view>>> {
+struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::string_view>>> {
   distribution_params<uint32_t> length_params;
 };
 
@@ -167,7 +165,7 @@ struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, cudf::
  * the element type.
  */
 template <typename T>
-struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, cudf::list_view>>> {
+struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::list_view>>> {
   cudf::type_id element_type;
   distribution_params<uint32_t> length_params;
   cudf::size_type max_depth;
@@ -175,7 +173,7 @@ struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, cudf::
 
 // Present for compilation only. To be implemented once reader/writers support the fixed width type.
 template <typename T>
-struct distribution_params<T, typename std::enable_if_t<cudf::is_fixed_point<T>()>> {
+struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
 };
 
 /**
@@ -225,8 +223,7 @@ class data_profile {
 
  public:
   template <typename T,
-            typename std::enable_if_t<!std::is_same_v<T, bool> && cuda::std::is_integral_v<T>, T>* =
-              nullptr>
+            std::enable_if_t<!std::is_same_v<T, bool> && cuda::std::is_integral_v<T>, T>* = nullptr>
   distribution_params<T> get_distribution_params() const
   {
     auto it = int_params.find(cudf::type_to_id<T>());
@@ -239,7 +236,7 @@ class data_profile {
     }
   }
 
-  template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
   distribution_params<T> get_distribution_params() const
   {
     auto it = float_params.find(cudf::type_to_id<T>());
@@ -258,7 +255,7 @@ class data_profile {
     return distribution_params<T>{bool_probability};
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_chrono<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_chrono<T>()>* = nullptr>
   distribution_params<T> get_distribution_params() const
   {
     auto it = int_params.find(cudf::type_to_id<T>());
@@ -284,7 +281,7 @@ class data_profile {
     return list_dist_desc;
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
   distribution_params<typename T::rep> get_distribution_params() const
   {
     using rep = typename T::rep;
@@ -307,7 +304,7 @@ class data_profile {
   // discrete distributions (integers, strings, lists). Otherwise the call with have no effect.
   template <typename T,
             typename Type_enum,
-            typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
+            std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
   void set_distribution_params(Type_enum type_or_group,
                                distribution_id dist,
                                T lower_bound,
@@ -331,7 +328,7 @@ class data_profile {
   // have continuous distributions (floating point types). Otherwise the call with have no effect.
   template <typename T,
             typename Type_enum,
-            typename std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
+            std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
   void set_distribution_params(Type_enum type_or_group,
                                distribution_id dist,
                                T lower_bound,
diff --git a/cpp/benchmarks/common/random_distribution_factory.hpp b/cpp/benchmarks/common/random_distribution_factory.hpp
index df2b6e0a754..f2f3833f15d 100644
--- a/cpp/benchmarks/common/random_distribution_factory.hpp
+++ b/cpp/benchmarks/common/random_distribution_factory.hpp
@@ -24,7 +24,7 @@
 /**
  * @brief Generates a normal(binomial) distribution between zero and upper_bound.
  */
-template <typename T, typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
+template <typename T, std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
 auto make_normal_dist(T upper_bound)
 {
   using uT = typename std::make_unsigned<T>::type;
@@ -42,7 +42,7 @@ auto make_normal_dist(T upper_bound)
   return std::normal_distribution<T>(mean, stddev);
 }
 
-template <typename T, typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
+template <typename T, std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
 auto make_uniform_dist(T range_start, T range_end)
 {
   return std::uniform_int_distribution<T>(range_start, range_end);
@@ -62,7 +62,7 @@ double geometric_dist_p(T range_size)
   return p ? p : std::numeric_limits<double>::epsilon();
 }
 
-template <typename T, typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
+template <typename T, std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
 auto make_geometric_dist(T range_start, T range_end)
 {
   using uT = typename std::make_unsigned<T>::type;
@@ -82,7 +82,7 @@ auto make_geometric_dist(T range_start, T range_end)
 template <typename T>
 using distribution_fn = std::function<T(std::mt19937&)>;
 
-template <typename T, typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
+template <typename T, std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
 distribution_fn<T> make_distribution(distribution_id did, T lower_bound, T upper_bound)
 {
   switch (did) {
diff --git a/cpp/benchmarks/string/json.cpp b/cpp/benchmarks/string/json.cpp
deleted file mode 100644
index 1ade4d01e1e..00000000000
--- a/cpp/benchmarks/string/json.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <benchmark/benchmark.h>
-#include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_wrapper.hpp>
-
-#include <cudf/strings/json.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-
-class JsonPath : public cudf::benchmark {
-};
-
-float frand() { return static_cast<float>(rand()) / static_cast<float>(RAND_MAX); }
-
-int rand_range(int min, int max) { return min + static_cast<int>(frand() * (max - min)); }
-
-std::vector<std::string> Books{
-  "{\n\"category\": \"reference\",\n\"author\": \"Nigel Rees\",\n\"title\": \"Sayings of the "
-  "Century\",\n\"price\": 8.95\n}",
-  "{\n\"category\": \"fiction\",\n\"author\": \"Evelyn Waugh\",\n\"title\": \"Sword of "
-  "Honour\",\n\"price\": 12.99\n}",
-  "{\n\"category\": \"fiction\",\n\"author\": \"Herman Melville\",\n\"title\": \"Moby "
-  "Dick\",\n\"isbn\": \"0-553-21311-3\",\n\"price\": 8.99\n}",
-  "{\n\"category\": \"fiction\",\n\"author\": \"J. R. R. Tolkien\",\n\"title\": \"The Lord of the "
-  "Rings\",\n\"isbn\": \"0-395-19395-8\",\n\"price\": 22.99\n}"};
-constexpr int Approx_book_size = 110;
-std::vector<std::string> Bicycles{
-  "{\"color\": \"red\", \"price\": 9.95}",
-  "{\"color\": \"green\", \"price\": 29.95}",
-  "{\"color\": \"blue\", \"price\": 399.95}",
-  "{\"color\": \"yellow\", \"price\": 99.95}",
-  "{\"color\": \"mauve\", \"price\": 199.95}",
-};
-constexpr int Approx_bicycle_size = 33;
-std::string Misc{"\n\"expensive\": 10\n"};
-std::string generate_field(std::vector<std::string> const& values, int num_values)
-{
-  std::string res;
-  for (int idx = 0; idx < num_values; idx++) {
-    if (idx > 0) { res += std::string(",\n"); }
-    int vindex = std::min(static_cast<int>(floor(frand() * values.size())),
-                          static_cast<int>(values.size() - 1));
-    res += values[vindex];
-  }
-  return res;
-}
-
-std::string build_row(int desired_bytes)
-{
-  // always have at least 2 books and 2 bikes
-  int num_books    = 2;
-  int num_bicycles = 2;
-  int remaining_bytes =
-    desired_bytes - ((num_books * Approx_book_size) + (num_bicycles * Approx_bicycle_size));
-
-  // divide up the remainder between books and bikes
-  float book_pct    = frand();
-  float bicycle_pct = 1.0f - book_pct;
-  num_books += (remaining_bytes * book_pct) / Approx_book_size;
-  num_bicycles += (remaining_bytes * bicycle_pct) / Approx_bicycle_size;
-
-  std::string books    = "\"book\": [\n" + generate_field(Books, num_books) + "]\n";
-  std::string bicycles = "\"bicycle\": [\n" + generate_field(Bicycles, num_bicycles) + "]\n";
-
-  std::string store = "\"store\": {\n";
-  if (frand() <= 0.5f) {
-    store += books + std::string(",\n") + bicycles;
-  } else {
-    store += bicycles + std::string(",\n") + books;
-  }
-  store += std::string("}\n");
-
-  std::string row = std::string("{\n");
-  if (frand() <= 0.5f) {
-    row += store + std::string(",\n") + Misc;
-  } else {
-    row += Misc + std::string(",\n") + store;
-  }
-  row += std::string("}\n");
-  return row;
-}
-
-template <class... QueryArg>
-static void BM_case(benchmark::State& state, QueryArg&&... query_arg)
-{
-  srand(5236);
-  auto iter = thrust::make_transform_iterator(
-    thrust::make_counting_iterator(0),
-    [desired_bytes = state.range(1)](int index) { return build_row(desired_bytes); });
-  int num_rows = state.range(0);
-  cudf::test::strings_column_wrapper input(iter, iter + num_rows);
-  cudf::strings_column_view scv(input);
-  size_t num_chars = scv.chars().size();
-
-  std::string json_path(query_arg...);
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    auto result = cudf::strings::get_json_object(scv, json_path);
-    cudaStreamSynchronize(0);
-  }
-
-  // this isn't strictly 100% accurate. a given query isn't necessarily
-  // going to visit every single incoming character.  but in spirit it does.
-  state.SetBytesProcessed(state.iterations() * num_chars);
-}
-
-#define JSON_BENCHMARK_DEFINE(name, query)                         \
-  BENCHMARK_CAPTURE(BM_case, name, query)                          \
-    ->ArgsProduct({{100, 1000, 100000, 400000}, {300, 600, 4096}}) \
-    ->UseManualTime()                                              \
-    ->Unit(benchmark::kMillisecond);
-
-JSON_BENCHMARK_DEFINE(query0, "$");
-JSON_BENCHMARK_DEFINE(query1, "$.store");
-JSON_BENCHMARK_DEFINE(query2, "$.store.book");
-JSON_BENCHMARK_DEFINE(query3, "$.store.*");
-JSON_BENCHMARK_DEFINE(query4, "$.store.book[*]");
-JSON_BENCHMARK_DEFINE(query5, "$.store.book[*].category");
-JSON_BENCHMARK_DEFINE(query6, "$.store['bicycle']");
-JSON_BENCHMARK_DEFINE(query7, "$.store.book[*]['isbn']");
-JSON_BENCHMARK_DEFINE(query8, "$.store.bicycle[1]");
diff --git a/cpp/benchmarks/string/json.cu b/cpp/benchmarks/string/json.cu
new file mode 100644
index 00000000000..69c42f97d7f
--- /dev/null
+++ b/cpp/benchmarks/string/json.cu
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmark/benchmark.h>
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/strings/string_view.hpp>
+#include <cudf/types.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/strings/detail/utilities.cuh>
+#include <cudf/strings/json.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+
+#include <thrust/random.h>
+
+class JsonPath : public cudf::benchmark {
+};
+
+const std::vector<std::string> Books{
+  R"json({
+"category": "reference",
+"author": "Nigel Rees",
+"title": "Sayings of the Century",
+"price": 8.95
+})json",
+  R"json({
+"category": "fiction",
+"author": "Evelyn Waugh",
+"title": "Sword of Honour",
+"price": 12.99
+})json",
+  R"json({
+"category": "fiction",
+"author": "Herman Melville",
+"title": "Moby Dick",
+"isbn": "0-553-21311-3",
+"price": 8.99
+})json",
+  R"json({
+"category": "fiction",
+"author": "J. R. R. Tolkien",
+"title": "The Lord of the Rings",
+"isbn": "0-395-19395-8",
+"price": 22.99
+})json"};
+constexpr int Approx_book_size = 110;
+const std::vector<std::string> Bicycles{
+  R"json({"color": "red", "price": 9.95})json",
+  R"json({"color": "green", "price": 29.95})json",
+  R"json({"color": "blue", "price": 399.95})json",
+  R"json({"color": "yellow", "price": 99.95})json",
+  R"json({"color": "mauve", "price": 199.95})json",
+};
+constexpr int Approx_bicycle_size = 33;
+std::string Misc{"\n\"expensive\": 10\n"};
+
+struct json_benchmark_row_builder {
+  int const desired_bytes;
+  cudf::size_type const num_rows;
+  cudf::column_device_view const d_books_bicycles[2];  // Books, Bicycles strings
+  cudf::column_device_view const d_book_pct;           // Book percentage
+  cudf::column_device_view const d_misc_order;         // Misc-Store order
+  cudf::column_device_view const d_store_order;        // Books-Bicycles order
+  int32_t* d_offsets{};
+  char* d_chars{};
+  thrust::minstd_rand rng{5236};
+  thrust::uniform_int_distribution<int> dist{};
+
+  // internal data structure for {bytes, out_ptr} with operator+=
+  struct bytes_and_ptr {
+    cudf::size_type bytes;
+    char* ptr;
+    __device__ bytes_and_ptr& operator+=(cudf::string_view const& str_append)
+    {
+      bytes += str_append.size_bytes();
+      if (ptr) { ptr = cudf::strings::detail::copy_string(ptr, str_append); }
+      return *this;
+    }
+  };
+
+  __device__ inline void copy_items(int this_idx,
+                                    cudf::size_type num_items,
+                                    bytes_and_ptr& output_str)
+  {
+    using param_type = thrust::uniform_int_distribution<int>::param_type;
+    dist.param(param_type{0, d_books_bicycles[this_idx].size() - 1});
+    cudf::string_view comma(",\n", 2);
+    for (int i = 0; i < num_items; i++) {
+      if (i > 0) { output_str += comma; }
+      int idx   = dist(rng);
+      auto item = d_books_bicycles[this_idx].element<cudf::string_view>(idx);
+      output_str += item;
+    }
+  }
+
+  __device__ void operator()(cudf::size_type idx)
+  {
+    int num_books       = 2;
+    int num_bicycles    = 2;
+    int remaining_bytes = max(
+      0, desired_bytes - ((num_books * Approx_book_size) + (num_bicycles * Approx_bicycle_size)));
+
+    // divide up the remainder between books and bikes
+    auto book_pct = d_book_pct.element<float>(idx);
+    // {Misc, store} OR {store, Misc}
+    // store: {books, bicycles} OR store: {bicycles, books}
+    float bicycle_pct = 1.0f - book_pct;
+    num_books += (remaining_bytes * book_pct) / Approx_book_size;
+    num_bicycles += (remaining_bytes * bicycle_pct) / Approx_bicycle_size;
+
+    char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr;
+    bytes_and_ptr output_str{0, out_ptr};
+    //
+    cudf::string_view comma(",\n", 2);
+    cudf::string_view brace1("{\n", 2);
+    cudf::string_view store_member_start[2]{{"\"book\": [\n", 10}, {"\"bicycle\": [\n", 13}};
+    cudf::string_view store("\"store\": {\n", 11);
+    cudf::string_view Misc{"\"expensive\": 10", 15};
+    cudf::string_view brace2("\n}", 2);
+    cudf::string_view square2{"\n]", 2};
+
+    output_str += brace1;
+    if (d_misc_order.element<bool>(idx)) {  // Misc. first.
+      output_str += Misc;
+      output_str += comma;
+    }
+    output_str += store;
+    for (int store_order = 0; store_order < 2; store_order++) {
+      if (store_order > 0) { output_str += comma; }
+      int this_idx    = (d_store_order.element<bool>(idx) == store_order);
+      auto& mem_start = store_member_start[this_idx];
+      output_str += mem_start;
+      copy_items(this_idx, this_idx == 0 ? num_books : num_bicycles, output_str);
+      output_str += square2;
+    }
+    output_str += brace2;
+    if (!d_misc_order.element<bool>(idx)) {  // Misc, if not first.
+      output_str += comma;
+      output_str += Misc;
+    }
+    output_str += brace2;
+    if (!output_str.ptr) d_offsets[idx] = output_str.bytes;
+  }
+};
+
+auto build_json_string_column(int desired_bytes, int num_rows)
+{
+  data_profile profile;
+  profile.set_cardinality(0);
+  profile.set_null_frequency(-0.1);
+  profile.set_distribution_params<float>(
+    cudf::type_id::FLOAT32, distribution_id::UNIFORM, 0.0, 1.0);
+  auto float_2bool_columns =
+    create_random_table({cudf::type_id::FLOAT32, cudf::type_id::BOOL8, cudf::type_id::BOOL8},
+                        row_count{num_rows},
+                        profile);
+
+  cudf::test::strings_column_wrapper books(Books.begin(), Books.end());
+  cudf::test::strings_column_wrapper bicycles(Bicycles.begin(), Bicycles.end());
+  auto d_books       = cudf::column_device_view::create(books);
+  auto d_bicycles    = cudf::column_device_view::create(bicycles);
+  auto d_book_pct    = cudf::column_device_view::create(float_2bool_columns->get_column(0));
+  auto d_misc_order  = cudf::column_device_view::create(float_2bool_columns->get_column(1));
+  auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2));
+  json_benchmark_row_builder jb{
+    desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
+  auto children = cudf::strings::detail::make_strings_children(jb, num_rows);
+  return cudf::make_strings_column(
+    num_rows, std::move(children.first), std::move(children.second), 0, {});
+}
+
+void BM_case(benchmark::State& state, std::string query_arg)
+{
+  srand(5236);
+  int num_rows      = state.range(0);
+  int desired_bytes = state.range(1);
+  auto input        = build_json_string_column(desired_bytes, num_rows);
+  cudf::strings_column_view scv(input->view());
+  size_t num_chars = scv.chars().size();
+
+  std::string json_path(query_arg);
+
+  for (auto _ : state) {
+    cuda_event_timer raii(state, true);
+    auto result = cudf::strings::get_json_object(scv, json_path);
+    cudaStreamSynchronize(0);
+  }
+
+  // this isn't strictly 100% accurate. a given query isn't necessarily
+  // going to visit every single incoming character.  but in spirit it does.
+  state.SetBytesProcessed(state.iterations() * num_chars);
+}
+
+#define JSON_BENCHMARK_DEFINE(name, query)                                                  \
+  BENCHMARK_DEFINE_F(JsonPath, name)(::benchmark::State & state) { BM_case(state, query); } \
+  BENCHMARK_REGISTER_F(JsonPath, name)                                                      \
+    ->ArgsProduct({{100, 1000, 100000, 400000}, {300, 600, 4096}})                          \
+    ->UseManualTime()                                                                       \
+    ->Unit(benchmark::kMillisecond);
+
+JSON_BENCHMARK_DEFINE(query0, "$");
+JSON_BENCHMARK_DEFINE(query1, "$.store");
+JSON_BENCHMARK_DEFINE(query2, "$.store.book");
+JSON_BENCHMARK_DEFINE(query3, "$.store.*");
+JSON_BENCHMARK_DEFINE(query4, "$.store.book[*]");
+JSON_BENCHMARK_DEFINE(query5, "$.store.book[*].category");
+JSON_BENCHMARK_DEFINE(query6, "$.store['bicycle']");
+JSON_BENCHMARK_DEFINE(query7, "$.store.book[*]['isbn']");
+JSON_BENCHMARK_DEFINE(query8, "$.store.bicycle[1]");
diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
index 48b31e5dae7..3be599e8c41 100644
--- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
+++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
@@ -42,7 +42,7 @@ struct Functor {
 };
 
 template <class Float, FunctorType ft>
-struct Functor<Float, ft, typename std::enable_if_t<std::is_floating_point_v<Float>>> {
+struct Functor<Float, ft, std::enable_if_t<std::is_floating_point_v<Float>>> {
   static __device__ Float f(Float x)
   {
     if (ft == BANDWIDTH_BOUND) {
diff --git a/cpp/include/cudf/detail/calendrical_month_sequence.cuh b/cpp/include/cudf/detail/calendrical_month_sequence.cuh
index 00742db7982..321cc3d19ef 100644
--- a/cpp/include/cudf/detail/calendrical_month_sequence.cuh
+++ b/cpp/include/cudf/detail/calendrical_month_sequence.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,12 +30,12 @@ namespace cudf {
 namespace detail {
 struct calendrical_month_sequence_functor {
   template <typename T>
-  typename std::enable_if_t<cudf::is_timestamp_t<T>::value, std::unique_ptr<cudf::column>>
-  operator()(size_type n,
-             scalar const& input,
-             size_type months,
-             rmm::cuda_stream_view stream,
-             rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+  std::enable_if_t<cudf::is_timestamp_t<T>::value, std::unique_ptr<cudf::column>> operator()(
+    size_type n,
+    scalar const& input,
+    size_type months,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
   {
     // Return empty column if n = 0
     if (n == 0) return cudf::make_empty_column(input.type());
@@ -59,8 +59,8 @@ struct calendrical_month_sequence_functor {
   }
 
   template <typename T, typename... Args>
-  typename std::enable_if_t<!cudf::is_timestamp_t<T>::value, std::unique_ptr<cudf::column>>
-  operator()(Args&&...)
+  std::enable_if_t<!cudf::is_timestamp_t<T>::value, std::unique_ptr<cudf::column>> operator()(
+    Args&&...)
   {
     CUDF_FAIL("Cannot make a date_range of a non-datetime type");
   }
diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh
index 76825285745..e176529ed6d 100644
--- a/cpp/include/cudf/detail/reduction.cuh
+++ b/cpp/include/cudf/detail/reduction.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,8 +49,8 @@ namespace detail {
 template <typename Op,
           typename InputIterator,
           typename OutputType = typename thrust::iterator_value<InputIterator>::type,
-          typename std::enable_if_t<is_fixed_width<OutputType>() &&
-                                    not cudf::is_fixed_point<OutputType>()>* = nullptr>
+          std::enable_if_t<is_fixed_width<OutputType>() &&
+                           not cudf::is_fixed_point<OutputType>()>* = nullptr>
 std::unique_ptr<scalar> reduce(InputIterator d_in,
                                cudf::size_type num_items,
                                op::simple_op<Op> sop,
@@ -92,7 +92,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
 template <typename Op,
           typename InputIterator,
           typename OutputType = typename thrust::iterator_value<InputIterator>::type,
-          typename std::enable_if_t<is_fixed_point<OutputType>()>* = nullptr>
+          std::enable_if_t<is_fixed_point<OutputType>()>* = nullptr>
 std::unique_ptr<scalar> reduce(InputIterator d_in,
                                cudf::size_type num_items,
                                op::simple_op<Op> sop,
@@ -109,7 +109,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
 template <typename Op,
           typename InputIterator,
           typename OutputType = typename thrust::iterator_value<InputIterator>::type,
-          typename std::enable_if_t<std::is_same_v<OutputType, string_view>>* = nullptr>
+          std::enable_if_t<std::is_same_v<OutputType, string_view>>* = nullptr>
 std::unique_ptr<scalar> reduce(InputIterator d_in,
                                cudf::size_type num_items,
                                op::simple_op<Op> sop,
diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh
index 221e90a9816..f985135064f 100644
--- a/cpp/include/cudf/detail/utilities/device_atomics.cuh
+++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh
@@ -426,7 +426,7 @@ struct typesAtomicCASImpl<T, 8> {
  * @returns The old value at `address`
  */
 template <typename T, typename BinaryOp>
-typename std::enable_if_t<cudf::is_numeric<T>(), T> __forceinline__ __device__
+std::enable_if_t<cudf::is_numeric<T>(), T> __forceinline__ __device__
 genericAtomicOperation(T* address, T const& update_value, BinaryOp op)
 {
   auto fun = cudf::detail::genericAtomicOperationImpl<T, BinaryOp>{};
@@ -435,7 +435,7 @@ genericAtomicOperation(T* address, T const& update_value, BinaryOp op)
 
 // specialization for cudf::detail::timestamp types
 template <typename T, typename BinaryOp>
-typename std::enable_if_t<cudf::is_timestamp<T>(), T> __forceinline__ __device__
+std::enable_if_t<cudf::is_timestamp<T>(), T> __forceinline__ __device__
 genericAtomicOperation(T* address, T const& update_value, BinaryOp op)
 {
   using R = typename T::rep;
@@ -448,7 +448,7 @@ genericAtomicOperation(T* address, T const& update_value, BinaryOp op)
 
 // specialization for cudf::detail::duration types
 template <typename T, typename BinaryOp>
-typename std::enable_if_t<cudf::is_duration<T>(), T> __forceinline__ __device__
+std::enable_if_t<cudf::is_duration<T>(), T> __forceinline__ __device__
 genericAtomicOperation(T* address, T const& update_value, BinaryOp op)
 {
   using R = typename T::rep;
@@ -616,7 +616,7 @@ __forceinline__ __device__ T atomicCAS(T* address, T compare, T val)
  *
  * @returns The old value at `address`
  */
-template <typename T, typename std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
+template <typename T, std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
 __forceinline__ __device__ T atomicAnd(T* address, T val)
 {
   return cudf::genericAtomicOperation(address, val, cudf::DeviceAnd{});
@@ -637,7 +637,7 @@ __forceinline__ __device__ T atomicAnd(T* address, T val)
  *
  * @returns The old value at `address`
  */
-template <typename T, typename std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
+template <typename T, std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
 __forceinline__ __device__ T atomicOr(T* address, T val)
 {
   return cudf::genericAtomicOperation(address, val, cudf::DeviceOr{});
@@ -658,7 +658,7 @@ __forceinline__ __device__ T atomicOr(T* address, T val)
  *
  * @returns The old value at `address`
  */
-template <typename T, typename std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
+template <typename T, std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
 __forceinline__ __device__ T atomicXor(T* address, T val)
 {
   return cudf::genericAtomicOperation(address, val, cudf::DeviceXor{});
diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh
index 9423cb6b998..87fef5bc187 100644
--- a/cpp/include/cudf/detail/utilities/device_operators.cuh
+++ b/cpp/include/cudf/detail/utilities/device_operators.cuh
@@ -61,27 +61,26 @@ CUDF_HOST_DEVICE inline auto max(LHS const& lhs, RHS const& rhs)
  * @brief Binary `sum` operator
  */
 struct DeviceSum {
-  template <typename T, typename std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
   CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs + rhs)
   {
     return lhs + rhs;
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
   static constexpr T identity()
   {
     return T{typename T::duration{0}};
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<!cudf::is_timestamp<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<!cudf::is_timestamp<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
   static constexpr T identity()
   {
     return T{0};
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
   static constexpr T identity()
   {
     CUDF_FAIL("fixed_point does not yet support device operator identity");
@@ -93,13 +92,13 @@ struct DeviceSum {
  * @brief `count` operator - used in rolling windows
  */
 struct DeviceCount {
-  template <typename T, typename std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
   CUDF_HOST_DEVICE inline T operator()(const T& lhs, const T& rhs)
   {
     return T{DeviceCount{}(lhs.time_since_epoch(), rhs.time_since_epoch())};
   }
 
-  template <typename T, typename std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
   CUDF_HOST_DEVICE inline T operator()(const T&, const T& rhs)
   {
     return rhs + T{1};
@@ -123,10 +122,9 @@ struct DeviceMin {
     return numeric::detail::min(lhs, rhs);
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<!std::is_same_v<T, cudf::string_view> && !cudf::is_dictionary<T>() &&
-                              !cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<!std::is_same_v<T, cudf::string_view> && !cudf::is_dictionary<T>() &&
+                             !cudf::is_fixed_point<T>()>* = nullptr>
   static constexpr T identity()
   {
     // chrono types do not have std::numeric_limits specializations and should use T::max()
@@ -135,7 +133,7 @@ struct DeviceMin {
     return cuda::std::numeric_limits<T>::max();
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
   static constexpr T identity()
   {
     CUDF_FAIL("fixed_point does not yet support DeviceMin identity");
@@ -143,13 +141,13 @@ struct DeviceMin {
   }
 
   // @brief identity specialized for string_view
-  template <typename T, typename std::enable_if_t<std::is_same_v<T, cudf::string_view>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_same_v<T, cudf::string_view>>* = nullptr>
   CUDF_HOST_DEVICE inline static constexpr T identity()
   {
     return string_view::max();
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
   static constexpr T identity()
   {
     return static_cast<T>(T::max_value());
@@ -167,10 +165,9 @@ struct DeviceMax {
     return numeric::detail::max(lhs, rhs);
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<!std::is_same_v<T, cudf::string_view> && !cudf::is_dictionary<T>() &&
-                              !cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<!std::is_same_v<T, cudf::string_view> && !cudf::is_dictionary<T>() &&
+                             !cudf::is_fixed_point<T>()>* = nullptr>
   static constexpr T identity()
   {
     // chrono types do not have std::numeric_limits specializations and should use T::min()
@@ -179,20 +176,20 @@ struct DeviceMax {
     return cuda::std::numeric_limits<T>::lowest();
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
   static constexpr T identity()
   {
     CUDF_FAIL("fixed_point does not yet support DeviceMax identity");
     return cuda::std::numeric_limits<T>::lowest();
   }
 
-  template <typename T, typename std::enable_if_t<std::is_same_v<T, cudf::string_view>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_same_v<T, cudf::string_view>>* = nullptr>
   CUDF_HOST_DEVICE inline static constexpr T identity()
   {
     return string_view::min();
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
   static constexpr T identity()
   {
     return static_cast<T>(T::lowest_value());
@@ -203,19 +200,19 @@ struct DeviceMax {
  * @brief binary `product` operator
  */
 struct DeviceProduct {
-  template <typename T, typename std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
   CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs * rhs)
   {
     return lhs * rhs;
   }
 
-  template <typename T, typename std::enable_if_t<!cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!cudf::is_fixed_point<T>()>* = nullptr>
   static constexpr T identity()
   {
     return T{1};
   }
 
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
   static constexpr T identity()
   {
     CUDF_FAIL("fixed_point does not yet support DeviceProduct identity");
@@ -227,7 +224,7 @@ struct DeviceProduct {
  * @brief binary `and` operator
  */
 struct DeviceAnd {
-  template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
   CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs & rhs)
   {
     return (lhs & rhs);
@@ -238,7 +235,7 @@ struct DeviceAnd {
  * @brief binary `or` operator
  */
 struct DeviceOr {
-  template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
   CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs | rhs)
   {
     return (lhs | rhs);
@@ -249,7 +246,7 @@ struct DeviceOr {
  * @brief binary `xor` operator
  */
 struct DeviceXor {
-  template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
   CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs ^ rhs)
   {
     return (lhs ^ rhs);
diff --git a/cpp/include/cudf/io/text/byte_range_info.hpp b/cpp/include/cudf/io/text/byte_range_info.hpp
new file mode 100644
index 00000000000..cb2d00f0d1f
--- /dev/null
+++ b/cpp/include/cudf/io/text/byte_range_info.hpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/utilities/error.hpp>
+
+#include <cstdint>
+#include <vector>
+
+namespace cudf {
+namespace io {
+namespace text {
+
+/**
+ * @brief stores offset and size used to indicate a byte range
+ */
+class byte_range_info {
+ private:
+  int64_t _offset;
+  int64_t _size;
+
+ public:
+  constexpr byte_range_info() noexcept : _offset(0), _size(0) {}
+  constexpr byte_range_info(int64_t offset, int64_t size) : _offset(offset), _size(size)
+  {
+    CUDF_EXPECTS(offset >= 0, "offset must be non-negative");
+    CUDF_EXPECTS(size >= 0, "size must be non-negative");
+  }
+
+  constexpr byte_range_info(byte_range_info const& other) noexcept = default;
+  constexpr byte_range_info& operator=(byte_range_info const& other) noexcept = default;
+
+  [[nodiscard]] constexpr int64_t offset() { return _offset; }
+  [[nodiscard]] constexpr int64_t size() { return _size; }
+};
+
+/**
+ * @brief Create a collection of consecutive ranges between [0, total_bytes).
+ *
+ * Each range wil be the same size except if `total_bytes` is not evenly divisible by
+ * `range_count`, in which case the last range size will be the remainder.
+ *
+ * @param total_bytes total number of bytes in all ranges
+ * @param range_count total number of ranges in which to divide bytes
+ * @return Vector of range objects
+ */
+std::vector<byte_range_info> create_byte_range_infos_consecutive(int64_t total_bytes,
+                                                                 int64_t range_count);
+
+/**
+ * @brief Create a byte_range_info which represents as much of a file as possible. Specifically,
+ * `[0, numeric_limit<int64_t>::max())`.
+ *
+ * @return `[0, numeric_limit<int64_t>::max())`
+ */
+byte_range_info create_byte_range_info_max();
+
+}  // namespace text
+}  // namespace io
+}  // namespace cudf
diff --git a/cpp/include/cudf/io/text/data_chunk_source.hpp b/cpp/include/cudf/io/text/data_chunk_source.hpp
index 5e6dda5a514..3499b86ab42 100644
--- a/cpp/include/cudf/io/text/data_chunk_source.hpp
+++ b/cpp/include/cudf/io/text/data_chunk_source.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,6 +36,7 @@ namespace text {
  */
 class device_data_chunk {
  public:
+  virtual ~device_data_chunk()                     = default;
   [[nodiscard]] virtual char const* data() const   = 0;
   [[nodiscard]] virtual std::size_t size() const   = 0;
   virtual operator device_span<char const>() const = 0;
@@ -52,6 +53,9 @@ class device_data_chunk {
  */
 class data_chunk_reader {
  public:
+  virtual ~data_chunk_reader()              = default;
+  virtual void skip_bytes(std::size_t size) = 0;
+
   /**
    * @brief Get the next chunk of bytes from the data source
    *
@@ -76,6 +80,7 @@ class data_chunk_reader {
  */
 class data_chunk_source {
  public:
+  virtual ~data_chunk_source()                                                   = default;
   [[nodiscard]] virtual std::unique_ptr<data_chunk_reader> create_reader() const = 0;
 };
 
diff --git a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
index aeb4b7fff53..ffe159b59dc 100644
--- a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
+++ b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -89,6 +89,8 @@ class istream_data_chunk_reader : public data_chunk_reader {
     }
   }
 
+  void skip_bytes(std::size_t size) override { _datastream->ignore(size); };
+
   std::unique_ptr<device_data_chunk> get_next_chunk(std::size_t read_size,
                                                     rmm::cuda_stream_view stream) override
   {
@@ -143,6 +145,12 @@ class device_span_data_chunk_reader : public data_chunk_reader {
  public:
   device_span_data_chunk_reader(device_span<char const> data) : _data(data) {}
 
+  void skip_bytes(std::size_t read_size) override
+  {
+    if (read_size > _data.size() - _position) { read_size = _data.size() - _position; }
+    _position += read_size;
+  };
+
   std::unique_ptr<device_data_chunk> get_next_chunk(std::size_t read_size,
                                                     rmm::cuda_stream_view stream) override
   {
diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp
index 06d15276a68..a908a9fa227 100644
--- a/cpp/include/cudf/io/text/detail/trie.hpp
+++ b/cpp/include/cudf/io/text/detail/trie.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -89,20 +89,6 @@ struct trie_device_view {
    */
   constexpr uint8_t get_match_length(uint16_t idx) { return _nodes[idx].match_length; }
 
-  /**
-   * @brief returns the longest matching state of any state in the multistate.
-   */
-  template <uint32_t N>
-  constexpr uint8_t get_match_length(multistate const& states)
-  {
-    int8_t val = 0;
-    for (uint8_t i = 0; i < states.size(); i++) {
-      auto match_length = get_match_length(states.get_tail(i));
-      if (match_length > val) { val = match_length; }
-    }
-    return val;
-  }
-
  private:
   constexpr void transition_enqueue_all(  //
     char c,
diff --git a/cpp/include/cudf/io/text/multibyte_split.hpp b/cpp/include/cudf/io/text/multibyte_split.hpp
index d42ee9f510e..25f7ef98a81 100644
--- a/cpp/include/cudf/io/text/multibyte_split.hpp
+++ b/cpp/include/cudf/io/text/multibyte_split.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/column/column.hpp>
+#include <cudf/io/text/byte_range_info.hpp>
 #include <cudf/io/text/data_chunk_source.hpp>
 
 #include <rmm/mr/device/device_memory_resource.hpp>
@@ -27,10 +28,53 @@ namespace cudf {
 namespace io {
 namespace text {
 
+/**
+ * @brief Splits the source text into a strings column using a multiple byte delimiter.
+ *
+ * Providing a byte range allows multibyte_split to read a whole file, but only return the offsets
+ * of delimiters which begin within the range. If thinking in terms of "records", where each
+ * delimiter dictates the end of a record, all records which begin within the byte range provided
+ * will be returned, including any record which may begin in the range but end outside of the
+ * range. Records which begin outside of the range will ignored, even if those records end inside
+ * the range.
+ *
+ * @code{.pseudo}
+ * Examples:
+ *  source:     "abc..def..ghi..jkl.."
+ *  delimiter:  ".."
+ *
+ *  byte_range: nullopt
+ *  return:     ["abc..", "def..", "ghi..", jkl..", ""]
+ *
+ *  byte_range: [0, 2)
+ *  return:     ["abc.."]
+ *
+ *  byte_range: [2, 9)
+ *  return:     ["def..", "ghi.."]
+ *
+ *  byte_range: [11, 2)
+ *  return:     []
+ *
+ *  byte_range: [13, 7)
+ *  return:     ["jkl..", ""]
+ * @endcode
+ *
+ * @param source The source string
+ * @param delimiter UTF-8 encoded string for which to find offsets in the source
+ * @param byte_range range in which to consider offsets relevant
+ * @param mr Memory resource to use for the device memory allocation
+ * @return The strings found by splitting the source by the delimiter within the relevant byte
+ * range.
+ */
 std::unique_ptr<cudf::column> multibyte_split(
   data_chunk_source const& source,
   std::string const& delimiter,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  std::optional<byte_range_info> byte_range = std::nullopt,
+  rmm::mr::device_memory_resource* mr       = rmm::mr::get_current_device_resource());
+
+std::unique_ptr<cudf::column> multibyte_split(data_chunk_source const& source,
+                                              std::string const& delimiter,
+                                              rmm::mr::device_memory_resource* mr);
 
 }  // namespace text
 }  // namespace io
diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 9ccd4d21682..1172a5a68cd 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -159,9 +159,9 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
   // Copy construction to support const conversion
   template <typename OtherT,
             std::size_t OtherExtent,
-            typename std::enable_if<(Extent == OtherExtent || Extent == dynamic_extent) &&
-                                      std::is_convertible_v<OtherT (*)[], T (*)[]>,
-                                    void>::type* = nullptr>
+            std::enable_if_t<(Extent == OtherExtent || Extent == dynamic_extent) &&
+                               std::is_convertible_v<OtherT (*)[], T (*)[]>,
+                             void>* = nullptr>
   constexpr host_span(const host_span<OtherT, OtherExtent>& other) noexcept
     : base(other.data(), other.size())
   {
@@ -220,9 +220,9 @@ struct device_span : public cudf::detail::span_base<T, Extent, device_span<T, Ex
 
   template <typename OtherT,
             std::size_t OtherExtent,
-            typename std::enable_if<(Extent == OtherExtent || Extent == dynamic_extent) &&
-                                      std::is_convertible_v<OtherT (*)[], T (*)[]>,
-                                    void>::type* = nullptr>
+            std::enable_if_t<(Extent == OtherExtent || Extent == dynamic_extent) &&
+                               std::is_convertible_v<OtherT (*)[], T (*)[]>,
+                             void>* = nullptr>
   constexpr device_span(const device_span<OtherT, OtherExtent>& other) noexcept
     : base(other.data(), other.size())
   {
@@ -283,9 +283,9 @@ class base_2dspan {
   template <typename OtherT,
             template <typename, size_t>
             typename OtherRowType,
-            typename std::enable_if<std::is_convertible_v<OtherRowType<OtherT, dynamic_extent>,
-                                                          RowType<T, dynamic_extent>>,
-                                    void>::type* = nullptr>
+            std::enable_if_t<std::is_convertible_v<OtherRowType<OtherT, dynamic_extent>,
+                                                   RowType<T, dynamic_extent>>,
+                             void>* = nullptr>
   constexpr base_2dspan(base_2dspan<OtherT, OtherRowType> const& other) noexcept
     : _data{other.data()}, _size{other.size()}
   {
diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp
index f1ad11a9030..504ec6de405 100644
--- a/cpp/include/cudf/utilities/traits.hpp
+++ b/cpp/include/cudf/utilities/traits.hpp
@@ -676,13 +676,13 @@ constexpr inline bool is_nested(data_type type)
 
 template <typename FromType>
 struct is_bit_castable_to_impl {
-  template <typename ToType, typename std::enable_if_t<is_compound<ToType>()>* = nullptr>
+  template <typename ToType, std::enable_if_t<is_compound<ToType>()>* = nullptr>
   constexpr bool operator()()
   {
     return false;
   }
 
-  template <typename ToType, typename std::enable_if_t<not is_compound<ToType>()>* = nullptr>
+  template <typename ToType, std::enable_if_t<not is_compound<ToType>()>* = nullptr>
   constexpr bool operator()()
   {
     if (not cuda::std::is_trivially_copyable_v<FromType> ||
@@ -696,13 +696,13 @@ struct is_bit_castable_to_impl {
 };
 
 struct is_bit_castable_from_impl {
-  template <typename FromType, typename std::enable_if_t<is_compound<FromType>()>* = nullptr>
+  template <typename FromType, std::enable_if_t<is_compound<FromType>()>* = nullptr>
   constexpr bool operator()(data_type)
   {
     return false;
   }
 
-  template <typename FromType, typename std::enable_if_t<not is_compound<FromType>()>* = nullptr>
+  template <typename FromType, std::enable_if_t<not is_compound<FromType>()>* = nullptr>
   constexpr bool operator()(data_type to)
   {
     return cudf::type_dispatcher(to, is_bit_castable_to_impl<FromType>{});
diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp
index aa77686fee4..cd96748f081 100644
--- a/cpp/include/cudf_test/column_utilities.hpp
+++ b/cpp/include/cudf_test/column_utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -178,7 +178,7 @@ bool validate_host_masks(std::vector<bitmask_type> const& expected_mask,
  * @return std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> first is the
  *  `column_view`'s data, and second is the column's bitmask.
  */
-template <typename T, typename std::enable_if_t<not cudf::is_fixed_point<T>()>* = nullptr>
+template <typename T, std::enable_if_t<not cudf::is_fixed_point<T>()>* = nullptr>
 std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c)
 {
   thrust::host_vector<T> host_data(c.size());
@@ -197,7 +197,7 @@ std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view
  * @return std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> first is the
  *  `column_view`'s data, and second is the column's bitmask.
  */
-template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
+template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
 std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c)
 {
   using namespace numeric;
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index c190105e292..4005a4f9adc 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -93,31 +93,31 @@ class column_wrapper {
 template <typename From, typename To>
 struct fixed_width_type_converter {
   // Are the types same - simply copy elements from [begin, end) to out
-  template <typename FromT                                                   = From,
-            typename ToT                                                     = To,
-            typename std::enable_if<std::is_same_v<FromT, ToT>, void>::type* = nullptr>
+  template <typename FromT                                      = From,
+            typename ToT                                        = To,
+            std::enable_if_t<std::is_same_v<FromT, ToT>, void>* = nullptr>
   constexpr ToT operator()(FromT element) const
   {
     return element;
   }
 
   // Are the types convertible or can target be constructed from source?
-  template <typename FromT                       = From,
-            typename ToT                         = To,
-            typename std::enable_if<!std::is_same_v<FromT, ToT> &&
-                                      (cudf::is_convertible<FromT, ToT>::value ||
-                                       std::is_constructible_v<ToT, FromT>),
-                                    void>::type* = nullptr>
+  template <
+    typename FromT          = From,
+    typename ToT            = To,
+    std::enable_if_t<!std::is_same_v<FromT, ToT> && (cudf::is_convertible<FromT, ToT>::value ||
+                                                     std::is_constructible_v<ToT, FromT>),
+                     void>* = nullptr>
   constexpr ToT operator()(FromT element) const
   {
     return static_cast<ToT>(element);
   }
 
   // Convert integral values to timestamps
-  template <typename FromT                       = From,
-            typename ToT                         = To,
-            typename std::enable_if<std::is_integral_v<FromT> && cudf::is_timestamp<ToT>(),
-                                    void>::type* = nullptr>
+  template <
+    typename FromT                                                                  = From,
+    typename ToT                                                                    = To,
+    std::enable_if_t<std::is_integral_v<FromT> && cudf::is_timestamp<ToT>(), void>* = nullptr>
   constexpr ToT operator()(FromT element) const
   {
     return ToT{typename ToT::duration{element}};
@@ -137,7 +137,7 @@ struct fixed_width_type_converter {
 template <typename ElementTo,
           typename ElementFrom,
           typename InputIterator,
-          typename std::enable_if_t<not cudf::is_fixed_point<ElementTo>()>* = nullptr>
+          std::enable_if_t<not cudf::is_fixed_point<ElementTo>()>* = nullptr>
 rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
 {
   static_assert(cudf::is_fixed_width<ElementTo>(), "Unexpected non-fixed width type.");
@@ -162,8 +162,8 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
 template <typename ElementTo,
           typename ElementFrom,
           typename InputIterator,
-          typename std::enable_if_t<not cudf::is_fixed_point<ElementFrom>() and
-                                    cudf::is_fixed_point<ElementTo>()>* = nullptr>
+          std::enable_if_t<not cudf::is_fixed_point<ElementFrom>() and
+                           cudf::is_fixed_point<ElementTo>()>* = nullptr>
 rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
 {
   using RepType        = typename ElementTo::rep;
@@ -187,8 +187,8 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
 template <typename ElementTo,
           typename ElementFrom,
           typename InputIterator,
-          typename std::enable_if_t<cudf::is_fixed_point<ElementFrom>() and
-                                    cudf::is_fixed_point<ElementTo>()>* = nullptr>
+          std::enable_if_t<cudf::is_fixed_point<ElementFrom>() and
+                           cudf::is_fixed_point<ElementTo>()>* = nullptr>
 rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
 {
   using namespace numeric;
diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp
index e84417c91d6..ac2892a0f34 100644
--- a/cpp/include/cudf_test/type_lists.hpp
+++ b/cpp/include/cudf_test/type_lists.hpp
@@ -80,9 +80,8 @@ constexpr auto types_to_ids()
  * @return Vector of TypeParam with the values specified
  */
 template <typename TypeParam, typename T>
-typename std::enable_if<cudf::is_fixed_width<TypeParam>() &&
-                          !cudf::is_timestamp_t<TypeParam>::value,
-                        thrust::host_vector<TypeParam>>::type
+std::enable_if_t<cudf::is_fixed_width<TypeParam>() && !cudf::is_timestamp_t<TypeParam>::value,
+                 thrust::host_vector<TypeParam>>
 make_type_param_vector(std::initializer_list<T> const& init_list)
 {
   thrust::host_vector<TypeParam> vec(init_list.size());
@@ -100,8 +99,7 @@ make_type_param_vector(std::initializer_list<T> const& init_list)
  * @return Vector of TypeParam with the values specified
  */
 template <typename TypeParam, typename T>
-typename std::enable_if<cudf::is_timestamp_t<TypeParam>::value,
-                        thrust::host_vector<TypeParam>>::type
+std::enable_if_t<cudf::is_timestamp_t<TypeParam>::value, thrust::host_vector<TypeParam>>
 make_type_param_vector(std::initializer_list<T> const& init_list)
 {
   thrust::host_vector<TypeParam> vec(init_list.size());
@@ -119,8 +117,7 @@ make_type_param_vector(std::initializer_list<T> const& init_list)
  */
 
 template <typename TypeParam, typename T>
-typename std::enable_if<std::is_same_v<TypeParam, std::string>,
-                        thrust::host_vector<std::string>>::type
+std::enable_if_t<std::is_same_v<TypeParam, std::string>, thrust::host_vector<std::string>>
 make_type_param_vector(std::initializer_list<T> const& init_list)
 {
   thrust::host_vector<std::string> vec(init_list.size());
diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu
index 995c6702cf8..c4538379836 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cu
+++ b/cpp/src/binaryop/compiled/binary_ops.cu
@@ -119,9 +119,9 @@ struct compare_functor {
 
   // This is used to compare a scalar and a column value
   template <typename LhsViewT = LhsDeviceViewT, typename RhsViewT = RhsDeviceViewT>
-  __device__ inline typename std::enable_if_t<std::is_same_v<LhsViewT, column_device_view> &&
-                                                !std::is_same_v<RhsViewT, column_device_view>,
-                                              OutT>
+  __device__ inline std::enable_if_t<std::is_same_v<LhsViewT, column_device_view> &&
+                                       !std::is_same_v<RhsViewT, column_device_view>,
+                                     OutT>
   operator()(cudf::size_type i) const
   {
     return cfunc_(lhs_dev_view_.is_valid(i),
@@ -133,9 +133,9 @@ struct compare_functor {
 
   // This is used to compare a scalar and a column value
   template <typename LhsViewT = LhsDeviceViewT, typename RhsViewT = RhsDeviceViewT>
-  __device__ inline typename std::enable_if_t<!std::is_same_v<LhsViewT, column_device_view> &&
-                                                std::is_same_v<RhsViewT, column_device_view>,
-                                              OutT>
+  __device__ inline std::enable_if_t<!std::is_same_v<LhsViewT, column_device_view> &&
+                                       std::is_same_v<RhsViewT, column_device_view>,
+                                     OutT>
   operator()(cudf::size_type i) const
   {
     return cfunc_(lhs_dev_view_.is_valid(),
@@ -147,9 +147,9 @@ struct compare_functor {
 
   // This is used to compare 2 column values
   template <typename LhsViewT = LhsDeviceViewT, typename RhsViewT = RhsDeviceViewT>
-  __device__ inline typename std::enable_if_t<std::is_same_v<LhsViewT, column_device_view> &&
-                                                std::is_same_v<RhsViewT, column_device_view>,
-                                              OutT>
+  __device__ inline std::enable_if_t<std::is_same_v<LhsViewT, column_device_view> &&
+                                       std::is_same_v<RhsViewT, column_device_view>,
+                                     OutT>
   operator()(cudf::size_type i) const
   {
     return cfunc_(lhs_dev_view_.is_valid(i),
diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp
index fefe0b3c862..118a08ab26d 100644
--- a/cpp/src/column/column_factories.cpp
+++ b/cpp/src/column/column_factories.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,21 +31,20 @@ namespace cudf {
 namespace {
 struct size_of_helper {
   cudf::data_type type;
-  template <typename T, typename std::enable_if_t<not is_fixed_width<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<not is_fixed_width<T>()>* = nullptr>
   constexpr int operator()() const
   {
     CUDF_FAIL("Invalid, non fixed-width element type.");
     return 0;
   }
 
-  template <typename T,
-            typename std::enable_if_t<is_fixed_width<T>() && not is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<is_fixed_width<T>() && not is_fixed_point<T>()>* = nullptr>
   constexpr int operator()() const noexcept
   {
     return sizeof(T);
   }
 
-  template <typename T, typename std::enable_if_t<is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<is_fixed_point<T>()>* = nullptr>
   constexpr int operator()() const noexcept
   {
     // Only want the sizeof fixed_point::Rep as fixed_point::scale is stored in data_type
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index 3412733f0b2..82e189b5a36 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -166,7 +166,7 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views,
   auto const output_size = output_view.size();
   auto* output_data      = output_view.data<T>();
 
-  size_type output_index     = threadIdx.x + blockIdx.x * blockDim.x;
+  int64_t output_index       = threadIdx.x + blockIdx.x * blockDim.x;
   size_type warp_valid_count = 0;
 
   unsigned active_mask;
@@ -222,7 +222,7 @@ std::unique_ptr<column> fused_concatenate(host_span<column_view const> views,
   auto const& d_offsets   = std::get<2>(device_views);
   auto const output_size  = std::get<3>(device_views);
 
-  CUDF_EXPECTS(output_size < static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
+  CUDF_EXPECTS(output_size <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
                "Total number of concatenated rows exceeds size_type range");
 
   // Allocate output
diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu
index 122ad4a9752..4dbe9faaa47 100644
--- a/cpp/src/datetime/datetime_ops.cu
+++ b/cpp/src/datetime/datetime_ops.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -278,14 +278,14 @@ struct launch_functor {
   launch_functor(column_view inp, mutable_column_view out) : input(inp), output(out) {}
 
   template <typename Element>
-  typename std::enable_if_t<!cudf::is_timestamp_t<Element>::value, void> operator()(
+  std::enable_if_t<!cudf::is_timestamp_t<Element>::value, void> operator()(
     rmm::cuda_stream_view stream) const
   {
     CUDF_FAIL("Cannot extract datetime component from non-timestamp column.");
   }
 
   template <typename Timestamp>
-  typename std::enable_if_t<cudf::is_timestamp_t<Timestamp>::value, void> operator()(
+  std::enable_if_t<cudf::is_timestamp_t<Timestamp>::value, void> operator()(
     rmm::cuda_stream_view stream) const
   {
     thrust::transform(rmm::exec_policy(stream),
@@ -326,18 +326,18 @@ std::unique_ptr<column> apply_datetime_op(column_view const& column,
 
 struct add_calendrical_months_functor {
   template <typename Element, typename... Args>
-  typename std::enable_if_t<!cudf::is_timestamp_t<Element>::value, std::unique_ptr<column>>
-  operator()(Args&&...) const
+  std::enable_if_t<!cudf::is_timestamp_t<Element>::value, std::unique_ptr<column>> operator()(
+    Args&&...) const
   {
     CUDF_FAIL("Cannot extract datetime component from non-timestamp column.");
   }
 
   template <typename Timestamp, typename MonthIterator>
-  typename std::enable_if_t<cudf::is_timestamp_t<Timestamp>::value, std::unique_ptr<column>>
-  operator()(column_view timestamp_column,
-             MonthIterator months_begin,
-             rmm::cuda_stream_view stream,
-             rmm::mr::device_memory_resource* mr) const
+  std::enable_if_t<cudf::is_timestamp_t<Timestamp>::value, std::unique_ptr<column>> operator()(
+    column_view timestamp_column,
+    MonthIterator months_begin,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const
   {
     auto size            = timestamp_column.size();
     auto output_col_type = timestamp_column.type();
diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu
index 301338fa1a8..871a36f7d62 100644
--- a/cpp/src/dictionary/detail/concatenate.cu
+++ b/cpp/src/dictionary/detail/concatenate.cu
@@ -122,8 +122,7 @@ struct compute_children_offsets_fn {
  */
 struct dispatch_compute_indices {
   template <typename Element>
-  typename std::enable_if_t<cudf::is_relationally_comparable<Element, Element>(),
-                            std::unique_ptr<column>>
+  std::enable_if_t<cudf::is_relationally_comparable<Element, Element>(), std::unique_ptr<column>>
   operator()(column_view const& all_keys,
              column_view const& all_indices,
              column_view const& new_keys,
@@ -184,8 +183,7 @@ struct dispatch_compute_indices {
   }
 
   template <typename Element, typename... Args>
-  typename std::enable_if_t<!cudf::is_relationally_comparable<Element, Element>(),
-                            std::unique_ptr<column>>
+  std::enable_if_t<!cudf::is_relationally_comparable<Element, Element>(), std::unique_ptr<column>>
   operator()(Args&&...)
   {
     CUDF_FAIL("dictionary concatenate not supported for this column type");
diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu
index c1fb1fa2180..7783e5f8daf 100644
--- a/cpp/src/dictionary/set_keys.cu
+++ b/cpp/src/dictionary/set_keys.cu
@@ -50,8 +50,7 @@ namespace {
  */
 struct dispatch_compute_indices {
   template <typename Element>
-  typename std::enable_if_t<cudf::is_relationally_comparable<Element, Element>(),
-                            std::unique_ptr<column>>
+  std::enable_if_t<cudf::is_relationally_comparable<Element, Element>(), std::unique_ptr<column>>
   operator()(dictionary_column_view const& input,
              column_view const& new_keys,
              rmm::cuda_stream_view stream,
@@ -100,8 +99,7 @@ struct dispatch_compute_indices {
   }
 
   template <typename Element, typename... Args>
-  typename std::enable_if_t<!cudf::is_relationally_comparable<Element, Element>(),
-                            std::unique_ptr<column>>
+  std::enable_if_t<!cudf::is_relationally_comparable<Element, Element>(), std::unique_ptr<column>>
   operator()(Args&&...)
   {
     CUDF_FAIL("dictionary set_keys not supported for this column type");
diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu
index c49142f91f9..e5bffcf21c1 100644
--- a/cpp/src/filling/sequence.cu
+++ b/cpp/src/filling/sequence.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -55,9 +55,8 @@ struct const_tabulator {
  * by init and step.
  */
 struct sequence_functor {
-  template <
-    typename T,
-    typename std::enable_if_t<cudf::is_numeric<T>() and not cudf::is_boolean<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<cudf::is_numeric<T>() and not cudf::is_boolean<T>()>* = nullptr>
   std::unique_ptr<column> operator()(size_type size,
                                      scalar const& init,
                                      scalar const& step,
@@ -83,9 +82,8 @@ struct sequence_functor {
     return result;
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<cudf::is_numeric<T>() and not cudf::is_boolean<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<cudf::is_numeric<T>() and not cudf::is_boolean<T>()>* = nullptr>
   std::unique_ptr<column> operator()(size_type size,
                                      scalar const& init,
                                      rmm::cuda_stream_view stream,
diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu
index 5154c867095..37d13d5aea3 100644
--- a/cpp/src/groupby/sort/group_nunique.cu
+++ b/cpp/src/groupby/sort/group_nunique.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,14 +33,14 @@ namespace detail {
 namespace {
 struct nunique_functor {
   template <typename T>
-  typename std::enable_if_t<cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>>
-  operator()(column_view const& values,
-             cudf::device_span<size_type const> group_labels,
-             size_type const num_groups,
-             cudf::device_span<size_type const> group_offsets,
-             null_policy null_handling,
-             rmm::cuda_stream_view stream,
-             rmm::mr::device_memory_resource* mr)
+  std::enable_if_t<cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>> operator()(
+    column_view const& values,
+    cudf::device_span<size_type const> group_labels,
+    size_type const num_groups,
+    cudf::device_span<size_type const> group_offsets,
+    null_policy null_handling,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr)
   {
     auto result = make_numeric_column(
       data_type(type_to_id<size_type>()), num_groups, mask_state::UNALLOCATED, stream, mr);
@@ -94,8 +94,8 @@ struct nunique_functor {
   }
 
   template <typename T, typename... Args>
-  typename std::enable_if_t<!cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>>
-  operator()(Args&&...)
+  std::enable_if_t<!cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>> operator()(
+    Args&&...)
   {
     CUDF_FAIL("list_view group_nunique not supported yet");
   }
diff --git a/cpp/src/groupby/sort/group_tdigest.cu b/cpp/src/groupby/sort/group_tdigest.cu
index f48ab852f24..f726de9bf3c 100644
--- a/cpp/src/groupby/sort/group_tdigest.cu
+++ b/cpp/src/groupby/sort/group_tdigest.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -707,9 +707,8 @@ struct get_scalar_minmax {
 };
 
 struct typed_group_tdigest {
-  template <
-    typename T,
-    typename std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& col,
                                      cudf::device_span<size_type const> group_offsets,
                                      cudf::device_span<size_type const> group_labels,
@@ -766,10 +765,9 @@ struct typed_group_tdigest {
                             mr);
   }
 
-  template <
-    typename T,
-    typename... Args,
-    typename std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            typename... Args,
+            std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<column> operator()(Args&&...)
   {
     CUDF_FAIL("Non-numeric type in group_tdigest");
diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu
index 13f5a57ac1f..e2e478af9ef 100644
--- a/cpp/src/io/csv/csv_gpu.cu
+++ b/cpp/src/io/csv/csv_gpu.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -375,8 +375,8 @@ struct decode_op {
    * @return bool Whether the parsed value is valid.
    */
   template <typename T,
-            typename std::enable_if_t<std::is_integral_v<T> and !std::is_same_v<T, bool> and
-                                      !cudf::is_fixed_point<T>()>* = nullptr>
+            std::enable_if_t<std::is_integral_v<T> and !std::is_same_v<T, bool> and
+                             !cudf::is_fixed_point<T>()>* = nullptr>
   __host__ __device__ __forceinline__ bool operator()(void* out_buffer,
                                                       size_t row,
                                                       const data_type,
@@ -402,7 +402,7 @@ struct decode_op {
    *
    * @return bool Whether the parsed value is valid.
    */
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
   __host__ __device__ __forceinline__ bool operator()(void* out_buffer,
                                                       size_t row,
                                                       const data_type output_type,
@@ -423,7 +423,7 @@ struct decode_op {
   /**
    * @brief Dispatch for boolean type types.
    */
-  template <typename T, typename std::enable_if_t<std::is_same_v<T, bool>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_same_v<T, bool>>* = nullptr>
   __host__ __device__ __forceinline__ bool operator()(void* out_buffer,
                                                       size_t row,
                                                       const data_type,
@@ -447,7 +447,7 @@ struct decode_op {
    * @brief Dispatch for floating points, which are set to NaN if the input
    * is not valid. In such case, the validity mask is set to zero too.
    */
-  template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
   __host__ __device__ __forceinline__ bool operator()(void* out_buffer,
                                                       size_t row,
                                                       const data_type,
@@ -466,8 +466,8 @@ struct decode_op {
    * @brief Dispatch for all other types.
    */
   template <typename T,
-            typename std::enable_if_t<!std::is_integral_v<T> and !std::is_floating_point_v<T> and
-                                      !cudf::is_fixed_point<T>()>* = nullptr>
+            std::enable_if_t<!std::is_integral_v<T> and !std::is_floating_point_v<T> and
+                             !cudf::is_fixed_point<T>()>* = nullptr>
   __host__ __device__ __forceinline__ bool operator()(void* out_buffer,
                                                       size_t row,
                                                       const data_type,
diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu
index 5cf0b03a6f1..21455e3ab93 100644
--- a/cpp/src/io/json/json_gpu.cu
+++ b/cpp/src/io/json/json_gpu.cu
@@ -216,7 +216,7 @@ struct ConvertFunctor {
    * It is handled here rather than within convertStrToValue() as that function
    * is used by other types (ex. timestamp) that aren't 'booleable'.
    */
-  template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
   __host__ __device__ __forceinline__ bool operator()(char const* begin,
                                                       char const* end,
                                                       void* output_column,
@@ -240,7 +240,7 @@ struct ConvertFunctor {
    * @brief Dispatch for floating points, which are set to NaN if the input
    * is not valid. In such case, the validity mask is set to zero too.
    */
-  template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
   __host__ __device__ __forceinline__ bool operator()(char const* begin,
                                                       char const* end,
                                                       void* out_buffer,
@@ -257,9 +257,8 @@ struct ConvertFunctor {
    * @brief Default template operator() dispatch specialization all data types
    * (including wrapper types) that is not covered by above.
    */
-  template <
-    typename T,
-    typename std::enable_if_t<!std::is_floating_point_v<T> and !std::is_integral_v<T>>* = nullptr>
+  template <typename T,
+            std::enable_if_t<!std::is_floating_point_v<T> and !std::is_integral_v<T>>* = nullptr>
   __host__ __device__ __forceinline__ bool operator()(char const* begin,
                                                       char const* end,
                                                       void* output_column,
diff --git a/cpp/src/io/orc/orc.h b/cpp/src/io/orc/orc.h
index 386e3d8d73a..47020023419 100644
--- a/cpp/src/io/orc/orc.h
+++ b/cpp/src/io/orc/orc.h
@@ -137,56 +137,51 @@ int inline constexpr encode_field_number(int field_number, ProtofType field_type
 }
 
 namespace {
-template <
-  typename base_t,
-  typename std::enable_if_t<!std::is_arithmetic_v<base_t> and !std::is_enum_v<base_t>>* = nullptr>
+template <typename base_t,
+          std::enable_if_t<!std::is_arithmetic_v<base_t> and !std::is_enum_v<base_t>>* = nullptr>
 int static constexpr encode_field_number_base(int field_number) noexcept
 {
   return encode_field_number(field_number, ProtofType::FIXEDLEN);
 }
 
-template <
-  typename base_t,
-  typename std::enable_if_t<std::is_integral_v<base_t> or std::is_enum_v<base_t>>* = nullptr>
+template <typename base_t,
+          std::enable_if_t<std::is_integral_v<base_t> or std::is_enum_v<base_t>>* = nullptr>
 int static constexpr encode_field_number_base(int field_number) noexcept
 {
   return encode_field_number(field_number, ProtofType::VARINT);
 }
 
-template <typename base_t, typename std::enable_if_t<std::is_same_v<base_t, float>>* = nullptr>
+template <typename base_t, std::enable_if_t<std::is_same_v<base_t, float>>* = nullptr>
 int static constexpr encode_field_number_base(int field_number) noexcept
 {
   return encode_field_number(field_number, ProtofType::FIXED32);
 }
 
-template <typename base_t, typename std::enable_if_t<std::is_same_v<base_t, double>>* = nullptr>
+template <typename base_t, std::enable_if_t<std::is_same_v<base_t, double>>* = nullptr>
 int static constexpr encode_field_number_base(int field_number) noexcept
 {
   return encode_field_number(field_number, ProtofType::FIXED64);
 }
 };  // namespace
 
-template <
-  typename T,
-  typename std::enable_if_t<!std::is_class_v<T> or std::is_same_v<T, std::string>>* = nullptr>
+template <typename T,
+          std::enable_if_t<!std::is_class_v<T> or std::is_same_v<T, std::string>>* = nullptr>
 int constexpr encode_field_number(int field_number) noexcept
 {
   return encode_field_number_base<T>(field_number);
 }
 
 // containters change the field number encoding
-template <
-  typename T,
-  typename std::enable_if_t<std::is_same_v<T, std::vector<typename T::value_type>>>* = nullptr>
+template <typename T,
+          std::enable_if_t<std::is_same_v<T, std::vector<typename T::value_type>>>* = nullptr>
 int constexpr encode_field_number(int field_number) noexcept
 {
   return encode_field_number_base<T>(field_number);
 }
 
 // optional fields don't change the field number encoding
-template <
-  typename T,
-  typename std::enable_if_t<std::is_same_v<T, std::optional<typename T::value_type>>>* = nullptr>
+template <typename T,
+          std::enable_if_t<std::is_same_v<T, std::optional<typename T::value_type>>>* = nullptr>
 int constexpr encode_field_number(int field_number) noexcept
 {
   return encode_field_number_base<typename T::value_type>(field_number);
@@ -244,19 +239,19 @@ class ProtobufReader {
 
   uint32_t read_field_size(const uint8_t* end);
 
-  template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
   void read_field(T& value, const uint8_t* end)
   {
     value = get<T>();
   }
 
-  template <typename T, typename std::enable_if_t<std::is_enum_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_enum_v<T>>* = nullptr>
   void read_field(T& value, const uint8_t* end)
   {
     value = static_cast<T>(get<uint32_t>());
   }
 
-  template <typename T, typename std::enable_if_t<std::is_same_v<T, std::string>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_same_v<T, std::string>>* = nullptr>
   void read_field(T& value, const uint8_t* end)
   {
     auto const size = read_field_size(end);
@@ -264,8 +259,7 @@ class ProtobufReader {
     m_cur += size;
   }
 
-  template <typename T,
-            typename std::enable_if_t<std::is_same_v<T, std::vector<std::string>>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_same_v<T, std::vector<std::string>>>* = nullptr>
   void read_field(T& value, const uint8_t* end)
   {
     auto const size = read_field_size(end);
@@ -273,10 +267,9 @@ class ProtobufReader {
     m_cur += size;
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<std::is_same_v<T, std::vector<typename T::value_type>> and
-                              !std::is_same_v<std::string, typename T::value_type>>* = nullptr>
+  template <typename T,
+            std::enable_if_t<std::is_same_v<T, std::vector<typename T::value_type>> and
+                             !std::is_same_v<std::string, typename T::value_type>>* = nullptr>
   void read_field(T& value, const uint8_t* end)
   {
     auto const size = read_field_size(end);
@@ -284,9 +277,8 @@ class ProtobufReader {
     read(value.back(), size);
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<std::is_same_v<T, std::optional<typename T::value_type>>>* = nullptr>
+  template <typename T,
+            std::enable_if_t<std::is_same_v<T, std::optional<typename T::value_type>>>* = nullptr>
   void read_field(T& value, const uint8_t* end)
   {
     typename T::value_type contained_value;
@@ -301,7 +293,7 @@ class ProtobufReader {
     read(value, size);
   }
 
-  template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
   void read_field(T& value, const uint8_t* end)
   {
     memcpy(&value, m_cur, sizeof(T));
diff --git a/cpp/src/io/text/byte_range_info.cpp b/cpp/src/io/text/byte_range_info.cpp
new file mode 100644
index 00000000000..290e0451839
--- /dev/null
+++ b/cpp/src/io/text/byte_range_info.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/io/text/byte_range_info.hpp>
+
+#include <limits>
+
+namespace cudf {
+namespace io {
+namespace text {
+
+byte_range_info create_byte_range_info_max() { return {0, std::numeric_limits<int64_t>::max()}; }
+
+std::vector<byte_range_info> create_byte_range_infos_consecutive(int64_t total_bytes,
+                                                                 int64_t range_count)
+{
+  auto range_size = util::div_rounding_up_safe(total_bytes, range_count);
+  auto ranges     = std::vector<byte_range_info>();
+
+  ranges.reserve(range_size);
+
+  for (int64_t i = 0; i < range_count; i++) {
+    auto offset = i * range_size;
+    auto size   = std::min(range_size, total_bytes - offset);
+    ranges.emplace_back(offset, size);
+  }
+
+  return ranges;
+}
+
+}  // namespace text
+}  // namespace io
+}  // namespace cudf
diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu
index d287b9f2419..99f3bde3bf6 100644
--- a/cpp/src/io/text/multibyte_split.cu
+++ b/cpp/src/io/text/multibyte_split.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,16 +18,24 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/io/text/byte_range_info.hpp>
 #include <cudf/io/text/data_chunk_source.hpp>
 #include <cudf/io/text/detail/multistate.hpp>
 #include <cudf/io/text/detail/tile_state.hpp>
 #include <cudf/io/text/detail/trie.hpp>
 #include <cudf/utilities/span.hpp>
 
+#include <limits>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
+#include <thrust/binary_search.h>
+#include <thrust/copy.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/transform_output_iterator.h>
+
 #include <cub/block/block_load.cuh>
 #include <cub/block/block_scan.cuh>
 
@@ -96,7 +104,7 @@ __global__ void multibyte_split_init_kernel(
   cudf::size_type base_tile_idx,
   cudf::size_type num_tiles,
   cudf::io::text::detail::scan_tile_state_view<multistate> tile_multistates,
-  cudf::io::text::detail::scan_tile_state_view<uint32_t> tile_output_offsets,
+  cudf::io::text::detail::scan_tile_state_view<int64_t> tile_output_offsets,
   cudf::io::text::detail::scan_tile_status status =
     cudf::io::text::detail::scan_tile_status::invalid)
 {
@@ -110,7 +118,7 @@ __global__ void multibyte_split_init_kernel(
 
 __global__ void multibyte_split_seed_kernel(
   cudf::io::text::detail::scan_tile_state_view<multistate> tile_multistates,
-  cudf::io::text::detail::scan_tile_state_view<uint32_t> tile_output_offsets,
+  cudf::io::text::detail::scan_tile_state_view<int64_t> tile_output_offsets,
   multistate tile_multistate_seed,
   uint32_t tile_output_offset)
 {
@@ -124,17 +132,15 @@ __global__ void multibyte_split_seed_kernel(
 __global__ void multibyte_split_kernel(
   cudf::size_type base_tile_idx,
   cudf::io::text::detail::scan_tile_state_view<multistate> tile_multistates,
-  cudf::io::text::detail::scan_tile_state_view<uint32_t> tile_output_offsets,
+  cudf::io::text::detail::scan_tile_state_view<int64_t> tile_output_offsets,
   cudf::io::text::detail::trie_device_view trie,
-  int32_t chunk_input_offset,
   cudf::device_span<char const> chunk_input_chars,
-  cudf::device_span<int32_t> abs_output_delimiter_offsets,
-  cudf::device_span<char> abs_output_chars)
+  cudf::device_span<int64_t> abs_output_delimiter_offsets)
 {
   using InputLoad =
     cub::BlockLoad<char, THREADS_PER_TILE, ITEMS_PER_THREAD, cub::BLOCK_LOAD_VECTORIZE>;
-  using OffsetScan         = cub::BlockScan<uint32_t, THREADS_PER_TILE>;
-  using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback<uint32_t>;
+  using OffsetScan         = cub::BlockScan<int64_t, THREADS_PER_TILE>;
+  using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback<int64_t>;
 
   __shared__ union {
     typename InputLoad::TempStorage input_load;
@@ -166,7 +172,7 @@ __global__ void multibyte_split_kernel(
 
   // STEP 3: Flag matches
 
-  uint32_t thread_offsets[ITEMS_PER_THREAD];
+  int64_t thread_offsets[ITEMS_PER_THREAD];
 
   for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) {
     thread_offsets[i] = i < thread_input_size and trie.is_match(thread_states[i]);
@@ -182,16 +188,11 @@ __global__ void multibyte_split_kernel(
 
   // Step 5: Assign outputs from each thread using match offsets.
 
-  if (abs_output_chars.size() > 0) {
-    for (int32_t i = 0; i < ITEMS_PER_THREAD and i < thread_input_size; i++) {
-      abs_output_chars[chunk_input_offset + thread_input_offset + i] = thread_chars[i];
-    }
-  }
-
   if (abs_output_delimiter_offsets.size() > 0) {
     for (int32_t i = 0; i < ITEMS_PER_THREAD and i < thread_input_size; i++) {
       if (trie.is_match(thread_states[i])) {
-        auto const match_end = base_tile_idx * ITEMS_PER_TILE + thread_input_offset + i + 1;
+        auto const match_end =
+          static_cast<int64_t>(base_tile_idx) * ITEMS_PER_TILE + thread_input_offset + i + 1;
         abs_output_delimiter_offsets[thread_offsets[i]] = match_end;
       }
     }
@@ -236,17 +237,16 @@ std::vector<rmm::cuda_stream_view> get_streams(int32_t count, rmm::cuda_stream_p
   return streams;
 }
 
-cudf::size_type multibyte_split_scan_full_source(cudf::io::text::data_chunk_source const& source,
-                                                 cudf::io::text::detail::trie const& trie,
-                                                 scan_tile_state<multistate>& tile_multistates,
-                                                 scan_tile_state<uint32_t>& tile_offsets,
-                                                 device_span<cudf::size_type> output_buffer,
-                                                 device_span<char> output_char_buffer,
-                                                 rmm::cuda_stream_view stream,
-                                                 std::vector<rmm::cuda_stream_view> const& streams)
+int64_t multibyte_split_scan_full_source(cudf::io::text::data_chunk_source const& source,
+                                         cudf::io::text::detail::trie const& trie,
+                                         scan_tile_state<multistate>& tile_multistates,
+                                         scan_tile_state<int64_t>& tile_offsets,
+                                         device_span<int64_t> output_buffer,
+                                         rmm::cuda_stream_view stream,
+                                         std::vector<rmm::cuda_stream_view> const& streams)
 {
   CUDF_FUNC_RANGE();
-  cudf::size_type chunk_offset = 0;
+  int64_t chunk_offset = 0;
 
   multibyte_split_init_kernel<<<TILES_PER_CHUNK, THREADS_PER_TILE, 0, stream.value()>>>(  //
     -TILES_PER_CHUNK,
@@ -298,14 +298,14 @@ cudf::size_type multibyte_split_scan_full_source(cudf::io::text::data_chunk_sour
       tile_multistates,
       tile_offsets,
       trie.view(),
-      chunk_offset,
       *chunk,
-      output_buffer,
-      output_char_buffer);
+      output_buffer);
 
     cudaEventRecord(last_launch_event, chunk_stream);
 
     chunk_offset += chunk->size();
+
+    chunk.reset();
   }
 
   cudaEventDestroy(last_launch_event);
@@ -317,6 +317,7 @@ cudf::size_type multibyte_split_scan_full_source(cudf::io::text::data_chunk_sour
 
 std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source const& source,
                                               std::string const& delimiter,
+                                              byte_range_info byte_range,
                                               rmm::cuda_stream_view stream,
                                               rmm::mr::device_memory_resource* mr,
                                               rmm::cuda_stream_pool& stream_pool)
@@ -336,7 +337,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
   // best when at least 32 more than max possible concurrent tiles, due to rolling `invalid`s
   auto num_tile_states  = std::max(32, TILES_PER_CHUNK * concurrency + 32);
   auto tile_multistates = scan_tile_state<multistate>(num_tile_states, stream);
-  auto tile_offsets     = scan_tile_state<uint32_t>(num_tile_states, stream);
+  auto tile_offsets     = scan_tile_state<int64_t>(num_tile_states, stream);
 
   auto streams = get_streams(concurrency, stream_pool);
 
@@ -345,52 +346,104 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
                                      trie,
                                      tile_multistates,
                                      tile_offsets,
-                                     cudf::device_span<int32_t>(static_cast<int32_t*>(nullptr), 0),
-                                     cudf::device_span<char>(static_cast<char*>(nullptr), 0),
+                                     cudf::device_span<int64_t>(static_cast<int64_t*>(nullptr), 0),
                                      stream,
                                      streams);
 
   // allocate results
-  auto num_tiles      = cudf::util::div_rounding_up_safe(bytes_total, ITEMS_PER_TILE);
-  auto num_results    = tile_offsets.get_inclusive_prefix(num_tiles - 1, stream);
-  auto string_offsets = rmm::device_uvector<int32_t>(num_results + 2, stream, mr);
-  auto string_chars   = rmm::device_uvector<char>(bytes_total, stream, mr);
+  auto num_tiles =
+    cudf::util::div_rounding_up_safe(bytes_total, static_cast<int64_t>(ITEMS_PER_TILE));
+  auto num_results = tile_offsets.get_inclusive_prefix(num_tiles - 1, stream);
+
+  auto string_offsets = rmm::device_uvector<int64_t>(num_results + 2, stream);
 
   // first and last element are set manually to zero and size of input, respectively.
   // kernel is only responsible for determining delimiter offsets
-  auto string_count = static_cast<cudf::size_type>(string_offsets.size() - 1);
   string_offsets.set_element_to_zero_async(0, stream);
-  string_offsets.set_element_async(string_count, bytes_total, stream);
+  string_offsets.set_element_async(string_offsets.size() - 1, bytes_total, stream);
+
+  // kernel needs to find first and last relevant offset., as well as count of relevant offsets.
 
   multibyte_split_scan_full_source(
     source,
     trie,
     tile_multistates,
     tile_offsets,
-    cudf::device_span<int32_t>(string_offsets).subspan(1, num_results),
-    string_chars,
+    cudf::device_span<int64_t>(string_offsets).subspan(1, num_results),
     stream,
     streams);
 
+  auto relevant_offsets_begin = thrust::lower_bound(rmm::exec_policy(stream),
+                                                    string_offsets.begin(),
+                                                    string_offsets.end() - 1,
+                                                    byte_range.offset());
+
+  auto relevant_offsets_end = thrust::upper_bound(rmm::exec_policy(stream),
+                                                  string_offsets.begin(),
+                                                  string_offsets.end() - 1,
+                                                  byte_range.offset() + byte_range.size()) +
+                              1;
+
+  auto string_offsets_out_size = relevant_offsets_end - relevant_offsets_begin;
+
+  auto string_offsets_out = rmm::device_uvector<int32_t>(string_offsets_out_size, stream, mr);
+
+  auto relevant_offset_first =
+    string_offsets.element(relevant_offsets_begin - string_offsets.begin(), stream);
+  auto relevant_offset_last =
+    string_offsets.element(relevant_offsets_end - string_offsets.begin() - 1, stream);
+
+  auto string_chars_size = relevant_offset_last - relevant_offset_first;
+  auto string_chars      = rmm::device_uvector<char>(string_chars_size, stream, mr);
+
+  // copy relevant offsets and adjust them to be zero-based.
+  thrust::transform(rmm::exec_policy(stream),
+                    relevant_offsets_begin,
+                    relevant_offsets_end,
+                    string_offsets_out.begin(),
+                    [relevant_offset_first] __device__(int64_t offset) {
+                      return static_cast<int32_t>(offset - relevant_offset_first);
+                    });
+
+  auto reader = source.create_reader();
+  reader->skip_bytes(relevant_offset_first);
+
+  auto relevant_bytes = reader->get_next_chunk(string_chars_size, stream);
+
+  thrust::copy(rmm::exec_policy(stream),
+               relevant_bytes->data(),  //
+               relevant_bytes->data() + relevant_bytes->size(),
+               string_chars.begin());
+
+  auto string_count = string_offsets_out.size() - 1;
+
   return cudf::make_strings_column(
-    string_count, std::move(string_offsets), std::move(string_chars));
+    string_count, std::move(string_offsets_out), std::move(string_chars));
 }
 
 }  // namespace detail
 
 std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source const& source,
                                               std::string const& delimiter,
+                                              std::optional<byte_range_info> byte_range,
                                               rmm::mr::device_memory_resource* mr)
 {
   auto stream      = rmm::cuda_stream_default;
   auto stream_pool = rmm::cuda_stream_pool(2);
-  auto result      = detail::multibyte_split(source, delimiter, stream, mr, stream_pool);
 
-  stream.synchronize();
+  auto result = detail::multibyte_split(
+    source, delimiter, byte_range.value_or(create_byte_range_info_max()), stream, mr, stream_pool);
 
   return result;
 }
 
+std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source const& source,
+                                              std::string const& delimiter,
+                                              rmm::mr::device_memory_resource* mr)
+{
+  return multibyte_split(source, delimiter, std::nullopt, mr);
+}
+
 }  // namespace text
 }  // namespace io
 }  // namespace cudf
diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh
index d1b2e2862c6..74b98eff010 100644
--- a/cpp/src/io/utilities/parsing_utils.cuh
+++ b/cpp/src/io/utilities/parsing_utils.cuh
@@ -98,7 +98,7 @@ struct parse_options {
  *
  * @return uint8_t Numeric value of the character, or `0`
  */
-template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
 constexpr uint8_t decode_digit(char c, bool* valid_flag)
 {
   if (c >= '0' && c <= '9') return c - '0';
@@ -119,7 +119,7 @@ constexpr uint8_t decode_digit(char c, bool* valid_flag)
  *
  * @return uint8_t Numeric value of the character, or `0`
  */
-template <typename T, typename std::enable_if_t<!std::is_integral_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<!std::is_integral_v<T>>* = nullptr>
 constexpr uint8_t decode_digit(char c, bool* valid_flag)
 {
   if (c >= '0' && c <= '9') return c - '0';
diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu
index bf9b06a3602..9811a986224 100644
--- a/cpp/src/reductions/scan/scan_exclusive.cu
+++ b/cpp/src/reductions/scan/scan_exclusive.cu
@@ -50,7 +50,7 @@ struct scan_dispatcher {
    * @param mr Device memory resource used to allocate the returned column's device memory
    * @return Output column with scan results
    */
-  template <typename T, typename std::enable_if_t<cuda::std::is_arithmetic_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<cuda::std::is_arithmetic_v<T>>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& input,
                                      null_policy,
                                      rmm::cuda_stream_view stream,
diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu
index bf2c83b5b8d..ee865f09f2e 100644
--- a/cpp/src/reductions/scan/scan_inclusive.cu
+++ b/cpp/src/reductions/scan/scan_inclusive.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -225,7 +225,7 @@ struct scan_dispatcher {
    *
    * @tparam T type of input column
    */
-  template <typename T, typename std::enable_if_t<is_supported<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<is_supported<T>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& input,
                                      null_policy,
                                      rmm::cuda_stream_view stream,
diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh
index 7dc8e6cb2c4..e5303246452 100644
--- a/cpp/src/reductions/simple.cuh
+++ b/cpp/src/reductions/simple.cuh
@@ -260,8 +260,7 @@ struct same_element_type_dispatcher {
     return !(cudf::is_dictionary<ElementType>() || std::is_same_v<ElementType, cudf::list_view>);
   }
 
-  template <typename IndexType,
-            typename std::enable_if_t<cudf::is_index_type<IndexType>()>* = nullptr>
+  template <typename IndexType, std::enable_if_t<cudf::is_index_type<IndexType>()>* = nullptr>
   std::unique_ptr<scalar> resolve_key(column_view const& keys,
                                       scalar const& keys_index,
                                       rmm::cuda_stream_view stream,
@@ -271,8 +270,7 @@ struct same_element_type_dispatcher {
     return cudf::detail::get_element(keys, index.value(stream), stream, mr);
   }
 
-  template <typename IndexType,
-            typename std::enable_if_t<!cudf::is_index_type<IndexType>()>* = nullptr>
+  template <typename IndexType, std::enable_if_t<!cudf::is_index_type<IndexType>()>* = nullptr>
   std::unique_ptr<scalar> resolve_key(column_view const&,
                                       scalar const&,
                                       rmm::cuda_stream_view,
@@ -353,7 +351,7 @@ struct element_type_dispatcher {
    * @brief Specialization for reducing floating-point column types to any output type.
    */
   template <typename ElementType,
-            typename std::enable_if_t<std::is_floating_point_v<ElementType>>* = nullptr>
+            std::enable_if_t<std::is_floating_point_v<ElementType>>* = nullptr>
   std::unique_ptr<scalar> reduce_numeric(column_view const& col,
                                          data_type const output_type,
                                          rmm::cuda_stream_view stream,
@@ -375,8 +373,7 @@ struct element_type_dispatcher {
   /**
    * @brief Specialization for reducing integer column types to any output type.
    */
-  template <typename ElementType,
-            typename std::enable_if_t<std::is_integral_v<ElementType>>* = nullptr>
+  template <typename ElementType, std::enable_if_t<std::is_integral_v<ElementType>>* = nullptr>
   std::unique_ptr<scalar> reduce_numeric(column_view const& col,
                                          data_type const output_type,
                                          rmm::cuda_stream_view stream,
@@ -405,8 +402,7 @@ struct element_type_dispatcher {
    * @param stream CUDA stream used for device memory operations and kernel launches.
    * @param mr Device memory resource used to allocate the returned scalar's device memory
    */
-  template <typename ElementType,
-            typename std::enable_if_t<cudf::is_numeric<ElementType>()>* = nullptr>
+  template <typename ElementType, std::enable_if_t<cudf::is_numeric<ElementType>()>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const& col,
                                      data_type const output_type,
                                      rmm::cuda_stream_view stream,
@@ -423,8 +419,7 @@ struct element_type_dispatcher {
   /**
    * @brief Specialization for reducing fixed_point column types to fixed_point number
    */
-  template <typename ElementType,
-            typename std::enable_if_t<cudf::is_fixed_point<ElementType>()>* = nullptr>
+  template <typename ElementType, std::enable_if_t<cudf::is_fixed_point<ElementType>()>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const& col,
                                      data_type const output_type,
                                      rmm::cuda_stream_view stream,
@@ -436,8 +431,8 @@ struct element_type_dispatcher {
   }
 
   template <typename ElementType,
-            typename std::enable_if_t<not cudf::is_numeric<ElementType>() and
-                                      not cudf::is_fixed_point<ElementType>()>* = nullptr>
+            std::enable_if_t<not cudf::is_numeric<ElementType>() and
+                             not cudf::is_fixed_point<ElementType>()>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const&,
                                      data_type const,
                                      rmm::cuda_stream_view,
diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu
index 93bc6cf5ae5..d41bdb6ca5a 100644
--- a/cpp/src/replace/nulls.cu
+++ b/cpp/src/replace/nulls.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -297,8 +297,7 @@ struct replace_nulls_functor {
  *        `replace_nulls` with the appropriate data types.
  */
 struct replace_nulls_scalar_kernel_forwarder {
-  template <typename col_type,
-            typename std::enable_if_t<cudf::is_fixed_width<col_type>()>* = nullptr>
+  template <typename col_type, std::enable_if_t<cudf::is_fixed_width<col_type>()>* = nullptr>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
                                            cudf::scalar const& replacement,
                                            rmm::cuda_stream_view stream,
diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu
index 0e3ead3fd99..cd66cad392e 100644
--- a/cpp/src/reshape/interleave_columns.cu
+++ b/cpp/src/reshape/interleave_columns.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -53,7 +53,7 @@ struct interleave_columns_functor {
 };
 
 template <typename T>
-struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cudf::list_view>>> {
+struct interleave_columns_impl<T, std::enable_if_t<std::is_same_v<T, cudf::list_view>>> {
   std::unique_ptr<column> operator()(table_view const& lists_columns,
                                      bool create_mask,
                                      rmm::cuda_stream_view stream,
@@ -64,7 +64,7 @@ struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cu
 };
 
 template <typename T>
-struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cudf::struct_view>>> {
+struct interleave_columns_impl<T, std::enable_if_t<std::is_same_v<T, cudf::struct_view>>> {
   std::unique_ptr<cudf::column> operator()(table_view const& structs_columns,
                                            bool create_mask,
                                            rmm::cuda_stream_view stream,
@@ -131,7 +131,7 @@ struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cu
 };
 
 template <typename T>
-struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cudf::string_view>>> {
+struct interleave_columns_impl<T, std::enable_if_t<std::is_same_v<T, cudf::string_view>>> {
   std::unique_ptr<cudf::column> operator()(table_view const& strings_columns,
                                            bool create_mask,
                                            rmm::cuda_stream_view stream,
@@ -214,7 +214,7 @@ struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cu
 };
 
 template <typename T>
-struct interleave_columns_impl<T, typename std::enable_if_t<cudf::is_fixed_width<T>()>> {
+struct interleave_columns_impl<T, std::enable_if_t<cudf::is_fixed_width<T>()>> {
   std::unique_ptr<cudf::column> operator()(table_view const& input,
                                            bool create_mask,
                                            rmm::cuda_stream_view stream,
diff --git a/cpp/src/rolling/rolling_detail.cuh b/cpp/src/rolling/rolling_detail.cuh
index 7c52856b147..958da04e57c 100644
--- a/cpp/src/rolling/rolling_detail.cuh
+++ b/cpp/src/rolling/rolling_detail.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -91,14 +91,14 @@ struct DeviceRolling {
 
   // operations we do support
   template <typename T = InputType, aggregation::Kind O = op>
-  DeviceRolling(size_type _min_periods, typename std::enable_if_t<is_supported<T, O>()>* = nullptr)
+  DeviceRolling(size_type _min_periods, std::enable_if_t<is_supported<T, O>()>* = nullptr)
     : min_periods(_min_periods)
   {
   }
 
   // operations we don't support
   template <typename T = InputType, aggregation::Kind O = op>
-  DeviceRolling(size_type _min_periods, typename std::enable_if_t<!is_supported<T, O>()>* = nullptr)
+  DeviceRolling(size_type _min_periods, std::enable_if_t<!is_supported<T, O>()>* = nullptr)
     : min_periods(_min_periods)
   {
     CUDF_FAIL("Invalid aggregation/type pair");
@@ -441,12 +441,12 @@ struct DeviceRollingLead {
     return cudf::is_fixed_width<T>();
   }
 
-  template <typename T = InputType, typename std::enable_if_t<is_supported<T>()>* = nullptr>
+  template <typename T = InputType, std::enable_if_t<is_supported<T>()>* = nullptr>
   DeviceRollingLead(size_type _row_offset) : row_offset(_row_offset)
   {
   }
 
-  template <typename T = InputType, typename std::enable_if_t<!is_supported<T>()>* = nullptr>
+  template <typename T = InputType, std::enable_if_t<!is_supported<T>()>* = nullptr>
   DeviceRollingLead(size_type _row_offset) : row_offset(_row_offset)
   {
     CUDF_FAIL("Invalid aggregation/type pair");
@@ -497,12 +497,12 @@ struct DeviceRollingLag {
     return cudf::is_fixed_width<T>();
   }
 
-  template <typename T = InputType, typename std::enable_if_t<is_supported<T>()>* = nullptr>
+  template <typename T = InputType, std::enable_if_t<is_supported<T>()>* = nullptr>
   DeviceRollingLag(size_type _row_offset) : row_offset(_row_offset)
   {
   }
 
-  template <typename T = InputType, typename std::enable_if_t<!is_supported<T>()>* = nullptr>
+  template <typename T = InputType, std::enable_if_t<!is_supported<T>()>* = nullptr>
   DeviceRollingLag(size_type _row_offset) : row_offset(_row_offset)
   {
     CUDF_FAIL("Invalid aggregation/type pair");
diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 81bf03f7c0a..9a2b1002997 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -49,26 +49,26 @@ inline double __device__ generic_round_half_even(double d) { return rint(d); }
 inline float __device__ generic_modf(float a, float* b) { return modff(a, b); }
 inline double __device__ generic_modf(double a, double* b) { return modf(a, b); }
 
-template <typename T, typename std::enable_if_t<cuda::std::is_signed_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<cuda::std::is_signed_v<T>>* = nullptr>
 T __device__ generic_abs(T value)
 {
   return numeric::detail::abs(value);
 }
 
-template <typename T, typename std::enable_if_t<not cuda::std::is_signed_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<not cuda::std::is_signed_v<T>>* = nullptr>
 T __device__ generic_abs(T value)
 {
   return value;
 }
 
-template <typename T, typename std::enable_if_t<cuda::std::is_signed_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<cuda::std::is_signed_v<T>>* = nullptr>
 int16_t __device__ generic_sign(T value)
 {
   return value < 0 ? -1 : 1;
 }
 
 // this is needed to suppress warning: pointless comparison of unsigned integer with zero
-template <typename T, typename std::enable_if_t<not cuda::std::is_signed_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<not cuda::std::is_signed_v<T>>* = nullptr>
 int16_t __device__ generic_sign(T)
 {
   return 1;
@@ -83,13 +83,13 @@ constexpr inline auto is_supported_round_type()
 template <typename T>
 struct half_up_zero {
   T n;  // unused in the decimal_places = 0 case
-  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
+  template <typename U = T, std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
   __device__ U operator()(U e)
   {
     return generic_round(e);
   }
 
-  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
+  template <typename U = T, std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
   __device__ U operator()(U)
   {
     assert(false);  // Should never get here. Just for compilation
@@ -100,7 +100,7 @@ struct half_up_zero {
 template <typename T>
 struct half_up_positive {
   T n;
-  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
+  template <typename U = T, std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
   __device__ U operator()(U e)
   {
     T integer_part;
@@ -108,7 +108,7 @@ struct half_up_positive {
     return integer_part + generic_round(fractional_part * n) / n;
   }
 
-  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
+  template <typename U = T, std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
   __device__ U operator()(U)
   {
     assert(false);  // Should never get here. Just for compilation
@@ -119,13 +119,13 @@ struct half_up_positive {
 template <typename T>
 struct half_up_negative {
   T n;
-  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
+  template <typename U = T, std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
   __device__ U operator()(U e)
   {
     return generic_round(e / n) * n;
   }
 
-  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
+  template <typename U = T, std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
   __device__ U operator()(U e)
   {
     auto const down = (e / n) * n;  // result from rounding down
@@ -136,13 +136,13 @@ struct half_up_negative {
 template <typename T>
 struct half_even_zero {
   T n;  // unused in the decimal_places = 0 case
-  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
+  template <typename U = T, std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
   __device__ U operator()(U e)
   {
     return generic_round_half_even(e);
   }
 
-  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
+  template <typename U = T, std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
   __device__ U operator()(U)
   {
     assert(false);  // Should never get here. Just for compilation
@@ -153,7 +153,7 @@ struct half_even_zero {
 template <typename T>
 struct half_even_positive {
   T n;
-  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
+  template <typename U = T, std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
   __device__ U operator()(U e)
   {
     T integer_part;
@@ -161,7 +161,7 @@ struct half_even_positive {
     return integer_part + generic_round_half_even(fractional_part * n) / n;
   }
 
-  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
+  template <typename U = T, std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
   __device__ U operator()(U)
   {
     assert(false);  // Should never get here. Just for compilation
@@ -172,13 +172,13 @@ struct half_even_positive {
 template <typename T>
 struct half_even_negative {
   T n;
-  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
+  template <typename U = T, std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
   __device__ U operator()(U e)
   {
     return generic_round_half_even(e / n) * n;
   }
 
-  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
+  template <typename U = T, std::enable_if_t<cuda::std::is_integral_v<U>>* = nullptr>
   __device__ U operator()(U e)
   {
     auto const down_over_n = e / n;            // use this to determine HALF_EVEN case
@@ -205,7 +205,7 @@ struct half_even_fixed_point {
 template <typename T,
           template <typename>
           typename RoundFunctor,
-          typename std::enable_if_t<not cudf::is_fixed_point<T>()>* = nullptr>
+          std::enable_if_t<not cudf::is_fixed_point<T>()>* = nullptr>
 std::unique_ptr<column> round_with(column_view const& input,
                                    int32_t decimal_places,
                                    rmm::cuda_stream_view stream,
@@ -231,7 +231,7 @@ std::unique_ptr<column> round_with(column_view const& input,
 template <typename T,
           template <typename>
           typename RoundFunctor,
-          typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
+          std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
 std::unique_ptr<column> round_with(column_view const& input,
                                    int32_t decimal_places,
                                    rmm::cuda_stream_view stream,
diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp
index c18b57d220f..3a2920f8f1a 100644
--- a/cpp/src/scalar/scalar_factories.cpp
+++ b/cpp/src/scalar/scalar_factories.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,8 +28,8 @@ namespace cudf {
 namespace {
 struct scalar_construction_helper {
   template <typename T,
-            typename ScalarType = scalar_type_t<T>,
-            typename std::enable_if_t<is_fixed_width<T>() and not is_fixed_point<T>()>* = nullptr>
+            typename ScalarType                                                = scalar_type_t<T>,
+            std::enable_if_t<is_fixed_width<T>() and not is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<scalar> operator()(rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr) const
   {
@@ -39,8 +39,8 @@ struct scalar_construction_helper {
   }
 
   template <typename T,
-            typename ScalarType                             = scalar_type_t<T>,
-            typename std::enable_if_t<is_fixed_point<T>()>* = nullptr>
+            typename ScalarType                    = scalar_type_t<T>,
+            std::enable_if_t<is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<scalar> operator()(rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr) const
   {
@@ -49,9 +49,7 @@ struct scalar_construction_helper {
     return std::unique_ptr<scalar>(s);
   }
 
-  template <typename T,
-            typename... Args,
-            typename std::enable_if_t<not is_fixed_width<T>()>* = nullptr>
+  template <typename T, typename... Args, std::enable_if_t<not is_fixed_width<T>()>* = nullptr>
   std::unique_ptr<scalar> operator()(Args... args) const
   {
     CUDF_FAIL("Invalid type.");
@@ -124,14 +122,14 @@ namespace {
 struct default_scalar_functor {
   data_type type;
 
-  template <typename T, typename std::enable_if_t<not is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<not is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<cudf::scalar> operator()(rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
   {
     return make_fixed_width_scalar(data_type(type_to_id<T>()), stream, mr);
   }
 
-  template <typename T, typename std::enable_if_t<is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<cudf::scalar> operator()(rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
   {
diff --git a/cpp/src/sort/sort.cu b/cpp/src/sort/sort.cu
index 42b57bdb47a..5ce82cd3740 100644
--- a/cpp/src/sort/sort.cu
+++ b/cpp/src/sort/sort.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -57,7 +57,7 @@ std::unique_ptr<table> sort_by_key(table_view const& values,
 }
 
 struct inplace_column_sort_fn {
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
   void operator()(mutable_column_view& col, bool ascending, rmm::cuda_stream_view stream) const
   {
     CUDF_EXPECTS(!col.has_nulls(), "Nulls not supported for in-place sort");
@@ -68,7 +68,7 @@ struct inplace_column_sort_fn {
     }
   }
 
-  template <typename T, typename std::enable_if_t<!cudf::is_fixed_width<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!cudf::is_fixed_width<T>()>* = nullptr>
   void operator()(mutable_column_view&, bool, rmm::cuda_stream_view) const
   {
     CUDF_FAIL("Column type must be relationally comparable and fixed-width");
diff --git a/cpp/src/sort/sort_column.cu b/cpp/src/sort/sort_column.cu
index 74c796e7962..7a4072cf8ae 100644
--- a/cpp/src/sort/sort_column.cu
+++ b/cpp/src/sort/sort_column.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ struct column_sorted_order_fn {
    * @param ascending True if sort order is ascending
    * @param stream CUDA stream used for device memory operations and kernel launches
    */
-  template <typename T, typename std::enable_if_t<is_radix_sort_supported<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<is_radix_sort_supported<T>()>* = nullptr>
   void radix_sort(column_view const& input,
                   mutable_column_view& indices,
                   bool ascending,
@@ -68,7 +68,7 @@ struct column_sorted_order_fn {
                           thrust::greater<T>());
     }
   }
-  template <typename T, typename std::enable_if_t<!is_radix_sort_supported<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!is_radix_sort_supported<T>()>* = nullptr>
   void radix_sort(column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view)
   {
     CUDF_FAIL("Only fixed-width types are suitable for faster sorting");
@@ -85,8 +85,7 @@ struct column_sorted_order_fn {
    * @param null_precedence How null rows are to be ordered
    * @param stream CUDA stream used for device memory operations and kernel launches
    */
-  template <typename T,
-            typename std::enable_if_t<cudf::is_relationally_comparable<T, T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_relationally_comparable<T, T>()>* = nullptr>
   void operator()(column_view const& input,
                   mutable_column_view& indices,
                   bool ascending,
@@ -105,8 +104,7 @@ struct column_sorted_order_fn {
     }
   }
 
-  template <typename T,
-            typename std::enable_if_t<!cudf::is_relationally_comparable<T, T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!cudf::is_relationally_comparable<T, T>()>* = nullptr>
   void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view)
   {
     CUDF_FAIL("Column type must be relationally comparable");
diff --git a/cpp/src/sort/stable_sort_column.cu b/cpp/src/sort/stable_sort_column.cu
index 49aecf52625..d79a691a580 100644
--- a/cpp/src/sort/stable_sort_column.cu
+++ b/cpp/src/sort/stable_sort_column.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,7 +28,7 @@ struct column_stable_sorted_order_fn {
    * @param indices Output sorted indices
    * @param stream CUDA stream used for device memory operations and kernel launches
    */
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
   void faster_stable_sort(column_view const& input,
                           mutable_column_view& indices,
                           rmm::cuda_stream_view stream)
@@ -38,7 +38,7 @@ struct column_stable_sorted_order_fn {
     thrust::stable_sort_by_key(
       rmm::exec_policy(stream), d_col.begin<T>(), d_col.end<T>(), indices.begin<size_type>());
   }
-  template <typename T, typename std::enable_if_t<!cudf::is_fixed_width<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!cudf::is_fixed_width<T>()>* = nullptr>
   void faster_stable_sort(column_view const&, mutable_column_view&, rmm::cuda_stream_view)
   {
     CUDF_FAIL("Only fixed-width types are suitable for faster stable sorting");
@@ -55,8 +55,7 @@ struct column_stable_sorted_order_fn {
    * @param null_precedence How null rows are to be ordered
    * @param stream CUDA stream used for device memory operations and kernel launches
    */
-  template <typename T,
-            typename std::enable_if_t<cudf::is_relationally_comparable<T, T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_relationally_comparable<T, T>()>* = nullptr>
   void operator()(column_view const& input,
                   mutable_column_view& indices,
                   bool ascending,
@@ -74,8 +73,7 @@ struct column_stable_sorted_order_fn {
       faster_stable_sort<T>(input, indices, stream);
     }
   }
-  template <typename T,
-            typename std::enable_if_t<!cudf::is_relationally_comparable<T, T>()>* = nullptr>
+  template <typename T, std::enable_if_t<!cudf::is_relationally_comparable<T, T>()>* = nullptr>
   void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view)
   {
     CUDF_FAIL("Column type must be relationally comparable");
diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu
index efdee65c1f6..23bc5cf2dfe 100644
--- a/cpp/src/strings/contains.cu
+++ b/cpp/src/strings/contains.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+#include <strings/regex/dispatcher.hpp>
+#include <strings/regex/regex.cuh>
+#include <strings/utilities.hpp>
+
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
@@ -23,123 +27,90 @@
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
-#include <strings/regex/regex.cuh>
-#include <strings/utilities.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <thrust/transform.h>
+
 namespace cudf {
 namespace strings {
 namespace detail {
+
 namespace {
 /**
  * @brief This functor handles both contains_re and match_re to minimize the number
  * of regex calls to find() to be inlined greatly reducing compile time.
- *
- * The stack is used to keep progress on evaluating the regex instructions on each string.
- * So the size of the stack is in proportion to the number of instructions in the given regex
- * pattern.
- *
- * There are three call types based on the number of regex instructions in the given pattern.
- * Small to medium instruction lengths can use the stack effectively though smaller executes faster.
- * Longer patterns require global memory.
  */
 template <int stack_size>
 struct contains_fn {
   reprog_device prog;
-  column_device_view d_strings;
-  bool bmatch{false};  // do not make this a template parameter to keep compile times down
+  column_device_view const d_strings;
+  bool const beginning_only;  // do not make this a template parameter to keep compile times down
 
   __device__ bool operator()(size_type idx)
   {
     if (d_strings.is_null(idx)) return false;
-    string_view d_str = d_strings.element<string_view>(idx);
-    int32_t begin     = 0;
-    int32_t end       = bmatch ? 1    // match only the beginning of the string;
-                               : -1;  // this handles empty strings too
+    auto const d_str = d_strings.element<string_view>(idx);
+    int32_t begin    = 0;
+    int32_t end      = beginning_only ? 1    // match only the beginning of the string;
+                                      : -1;  // match anywhere in the string
     return static_cast<bool>(prog.find<stack_size>(idx, d_str, begin, end));
   }
 };
 
-//
-std::unique_ptr<column> contains_util(
-  strings_column_view const& strings,
-  std::string const& pattern,
-  regex_flags const flags,
-  bool beginning_only                 = false,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
-{
-  auto strings_count  = strings.size();
-  auto strings_column = column_device_view::create(strings.parent(), stream);
-  auto d_column       = *strings_column;
-
-  // compile regex into device object
-  auto prog =
-    reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream);
-  auto d_prog = *prog;
-
-  // create the output column
-  auto results   = make_numeric_column(data_type{type_id::BOOL8},
-                                     strings_count,
-                                     cudf::detail::copy_bitmask(strings.parent(), stream, mr),
-                                     strings.null_count(),
-                                     stream,
-                                     mr);
-  auto d_results = results->mutable_view().data<bool>();
+struct contains_dispatch_fn {
+  reprog_device d_prog;
+  bool const beginning_only;
 
-  // fill the output column
-  int regex_insts = d_prog.insts_counts();
-  if (regex_insts <= RX_SMALL_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_results,
-                      contains_fn<RX_STACK_SMALL>{d_prog, d_column, beginning_only});
-  else if (regex_insts <= RX_MEDIUM_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_results,
-                      contains_fn<RX_STACK_MEDIUM>{d_prog, d_column, beginning_only});
-  else if (regex_insts <= RX_LARGE_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_results,
-                      contains_fn<RX_STACK_LARGE>{d_prog, d_column, beginning_only});
-  else
+  template <int stack_size>
+  std::unique_ptr<column> operator()(strings_column_view const& input,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    auto results = make_numeric_column(data_type{type_id::BOOL8},
+                                       input.size(),
+                                       cudf::detail::copy_bitmask(input.parent(), stream, mr),
+                                       input.null_count(),
+                                       stream,
+                                       mr);
+
+    auto const d_strings = column_device_view::create(input.parent(), stream);
     thrust::transform(rmm::exec_policy(stream),
                       thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_results,
-                      contains_fn<RX_STACK_ANY>{d_prog, d_column, beginning_only});
-
-  results->set_null_count(strings.null_count());
-  return results;
-}
+                      thrust::make_counting_iterator<size_type>(input.size()),
+                      results->mutable_view().data<bool>(),
+                      contains_fn<stack_size>{d_prog, *d_strings, beginning_only});
+    return results;
+  }
+};
 
 }  // namespace
 
 std::unique_ptr<column> contains_re(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   std::string const& pattern,
   regex_flags const flags,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  return contains_util(strings, pattern, flags, false, stream, mr);
+  auto d_prog =
+    reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream);
+
+  return regex_dispatcher(*d_prog, contains_dispatch_fn{*d_prog, false}, input, stream, mr);
 }
 
 std::unique_ptr<column> matches_re(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   std::string const& pattern,
   regex_flags const flags,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  return contains_util(strings, pattern, flags, true, stream, mr);
+  auto d_prog =
+    reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream);
+
+  return regex_dispatcher(*d_prog, contains_dispatch_fn{*d_prog, true}, input, stream, mr);
 }
 
 }  // namespace detail
@@ -172,12 +143,12 @@ namespace {
 template <int stack_size>
 struct count_fn {
   reprog_device prog;
-  column_device_view d_strings;
+  column_device_view const d_strings;
 
   __device__ int32_t operator()(unsigned int idx)
   {
     if (d_strings.is_null(idx)) return 0;
-    string_view d_str  = d_strings.element<string_view>(idx);
+    auto const d_str   = d_strings.element<string_view>(idx);
     auto const nchars  = d_str.length();
     int32_t find_count = 0;
     int32_t begin      = 0;
@@ -191,62 +162,45 @@ struct count_fn {
   }
 };
 
+struct count_dispatch_fn {
+  reprog_device d_prog;
+
+  template <int stack_size>
+  std::unique_ptr<column> operator()(strings_column_view const& input,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    auto results = make_numeric_column(data_type{type_id::INT32},
+                                       input.size(),
+                                       cudf::detail::copy_bitmask(input.parent(), stream, mr),
+                                       input.null_count(),
+                                       stream,
+                                       mr);
+
+    auto const d_strings = column_device_view::create(input.parent(), stream);
+    thrust::transform(rmm::exec_policy(stream),
+                      thrust::make_counting_iterator<size_type>(0),
+                      thrust::make_counting_iterator<size_type>(input.size()),
+                      results->mutable_view().data<int32_t>(),
+                      count_fn<stack_size>{d_prog, *d_strings});
+    return results;
+  }
+};
+
 }  // namespace
 
 std::unique_ptr<column> count_re(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   std::string const& pattern,
   regex_flags const flags,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  auto strings_count  = strings.size();
-  auto strings_column = column_device_view::create(strings.parent(), stream);
-  auto d_column       = *strings_column;
-
   // compile regex into device object
-  auto prog =
-    reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream);
-  auto d_prog = *prog;
-
-  // create the output column
-  auto results   = make_numeric_column(data_type{type_id::INT32},
-                                     strings_count,
-                                     cudf::detail::copy_bitmask(strings.parent(), stream, mr),
-                                     strings.null_count(),
-                                     stream,
-                                     mr);
-  auto d_results = results->mutable_view().data<int32_t>();
-
-  // fill the output column
-  int regex_insts = d_prog.insts_counts();
-  if (regex_insts <= RX_SMALL_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_results,
-                      count_fn<RX_STACK_SMALL>{d_prog, d_column});
-  else if (regex_insts <= RX_MEDIUM_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_results,
-                      count_fn<RX_STACK_MEDIUM>{d_prog, d_column});
-  else if (regex_insts <= RX_LARGE_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_results,
-                      count_fn<RX_STACK_LARGE>{d_prog, d_column});
-  else
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_results,
-                      count_fn<RX_STACK_ANY>{d_prog, d_column});
+  auto d_prog =
+    reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream);
 
-  results->set_null_count(strings.null_count());
-  return results;
+  return regex_dispatcher(*d_prog, count_dispatch_fn{*d_prog}, input, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/strings/count_matches.cu b/cpp/src/strings/count_matches.cu
index d0a6825666b..ae996cafd2c 100644
--- a/cpp/src/strings/count_matches.cu
+++ b/cpp/src/strings/count_matches.cu
@@ -15,6 +15,7 @@
  */
 
 #include <strings/count_matches.hpp>
+#include <strings/regex/dispatcher.hpp>
 #include <strings/regex/regex.cuh>
 
 #include <cudf/column/column_device_view.cuh>
@@ -54,6 +55,27 @@ struct count_matches_fn {
     return count;
   }
 };
+
+struct count_dispatch_fn {
+  reprog_device d_prog;
+
+  template <int stack_size>
+  std::unique_ptr<column> operator()(column_device_view const& d_strings,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    auto results = make_numeric_column(
+      data_type{type_id::INT32}, d_strings.size() + 1, mask_state::UNALLOCATED, stream, mr);
+
+    thrust::transform(rmm::exec_policy(stream),
+                      thrust::make_counting_iterator<size_type>(0),
+                      thrust::make_counting_iterator<size_type>(d_strings.size()),
+                      results->mutable_view().data<int32_t>(),
+                      count_matches_fn<stack_size>{d_strings, d_prog});
+    return results;
+  }
+};
+
 }  // namespace
 
 /**
@@ -71,31 +93,7 @@ std::unique_ptr<column> count_matches(column_device_view const& d_strings,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
 {
-  // Create output column
-  auto counts = make_numeric_column(
-    data_type{type_id::INT32}, d_strings.size() + 1, mask_state::UNALLOCATED, stream, mr);
-  auto d_counts = counts->mutable_view().data<offset_type>();
-
-  auto begin = thrust::make_counting_iterator<size_type>(0);
-  auto end   = thrust::make_counting_iterator<size_type>(d_strings.size());
-
-  // Count matches
-  auto const regex_insts = d_prog.insts_counts();
-  if (regex_insts <= RX_SMALL_INSTS) {
-    count_matches_fn<RX_STACK_SMALL> fn{d_strings, d_prog};
-    thrust::transform(rmm::exec_policy(stream), begin, end, d_counts, fn);
-  } else if (regex_insts <= RX_MEDIUM_INSTS) {
-    count_matches_fn<RX_STACK_MEDIUM> fn{d_strings, d_prog};
-    thrust::transform(rmm::exec_policy(stream), begin, end, d_counts, fn);
-  } else if (regex_insts <= RX_LARGE_INSTS) {
-    count_matches_fn<RX_STACK_LARGE> fn{d_strings, d_prog};
-    thrust::transform(rmm::exec_policy(stream), begin, end, d_counts, fn);
-  } else {
-    count_matches_fn<RX_STACK_ANY> fn{d_strings, d_prog};
-    thrust::transform(rmm::exec_policy(stream), begin, end, d_counts, fn);
-  }
-
-  return counts;
+  return regex_dispatcher(d_prog, count_dispatch_fn{d_prog}, d_strings, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/strings/extract/extract.cu b/cpp/src/strings/extract/extract.cu
index a67af9442f0..7394cdac6bb 100644
--- a/cpp/src/strings/extract/extract.cu
+++ b/cpp/src/strings/extract/extract.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <strings/regex/dispatcher.hpp>
 #include <strings/regex/regex.cuh>
 #include <strings/utilities.hpp>
 
@@ -77,53 +78,44 @@ struct extract_fn {
     thrust::fill(thrust::seq, d_output.begin(), d_output.end(), string_index_pair{nullptr, 0});
   }
 };
+
+struct extract_dispatch_fn {
+  reprog_device d_prog;
+
+  template <int stack_size>
+  void operator()(column_device_view const& d_strings,
+                  cudf::detail::device_2dspan<string_index_pair>& d_indices,
+                  rmm::cuda_stream_view stream)
+  {
+    thrust::for_each_n(rmm::exec_policy(stream),
+                       thrust::make_counting_iterator<size_type>(0),
+                       d_strings.size(),
+                       extract_fn<stack_size>{d_prog, d_strings, d_indices});
+  }
+};
 }  // namespace
 
 //
 std::unique_ptr<table> extract(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   std::string const& pattern,
   regex_flags const flags,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  auto const strings_count  = strings.size();
-  auto const strings_column = column_device_view::create(strings.parent(), stream);
-  auto const d_strings      = *strings_column;
-
   // compile regex into device object
-  auto prog =
-    reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream);
-  auto d_prog = *prog;
-  // extract should include groups
-  auto const groups = d_prog.group_counts();
+  auto d_prog =
+    reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream);
+
+  auto const groups = d_prog->group_counts();
   CUDF_EXPECTS(groups > 0, "Group indicators not found in regex pattern");
 
-  rmm::device_uvector<string_index_pair> indices(strings_count * groups, stream);
-  cudf::detail::device_2dspan<string_index_pair> d_indices(indices.data(), strings_count, groups);
+  auto indices = rmm::device_uvector<string_index_pair>(input.size() * groups, stream);
+  auto d_indices =
+    cudf::detail::device_2dspan<string_index_pair>(indices.data(), input.size(), groups);
 
-  auto const regex_insts = d_prog.insts_counts();
-  if (regex_insts <= RX_SMALL_INSTS) {
-    thrust::for_each_n(rmm::exec_policy(stream),
-                       thrust::make_counting_iterator<size_type>(0),
-                       strings_count,
-                       extract_fn<RX_STACK_SMALL>{d_prog, d_strings, d_indices});
-  } else if (regex_insts <= RX_MEDIUM_INSTS) {
-    thrust::for_each_n(rmm::exec_policy(stream),
-                       thrust::make_counting_iterator<size_type>(0),
-                       strings_count,
-                       extract_fn<RX_STACK_MEDIUM>{d_prog, d_strings, d_indices});
-  } else if (regex_insts <= RX_LARGE_INSTS) {
-    thrust::for_each_n(rmm::exec_policy(stream),
-                       thrust::make_counting_iterator<size_type>(0),
-                       strings_count,
-                       extract_fn<RX_STACK_LARGE>{d_prog, d_strings, d_indices});
-  } else {
-    thrust::for_each_n(rmm::exec_policy(stream),
-                       thrust::make_counting_iterator<size_type>(0),
-                       strings_count,
-                       extract_fn<RX_STACK_ANY>{d_prog, d_strings, d_indices});
-  }
+  auto const d_strings = column_device_view::create(input.parent(), stream);
+  regex_dispatcher(*d_prog, extract_dispatch_fn{*d_prog}, *d_strings, d_indices, stream);
 
   // build a result column for each group
   std::vector<std::unique_ptr<column>> results(groups);
@@ -135,7 +127,7 @@ std::unique_ptr<table> extract(
                                           0, [column_index, groups] __device__(size_type idx) {
                                             return (idx * groups) + column_index;
                                           }));
-    return make_strings_column(indices_itr, indices_itr + strings_count, stream, mr);
+    return make_strings_column(indices_itr, indices_itr + input.size(), stream, mr);
   };
 
   std::transform(thrust::make_counting_iterator<size_type>(0),
diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu
index e27dccb9338..1f1474c777b 100644
--- a/cpp/src/strings/extract/extract_all.cu
+++ b/cpp/src/strings/extract/extract_all.cu
@@ -15,6 +15,7 @@
  */
 
 #include <strings/count_matches.hpp>
+#include <strings/regex/dispatcher.hpp>
 #include <strings/regex/regex.cuh>
 #include <strings/utilities.hpp>
 
@@ -86,6 +87,28 @@ struct extract_fn {
     }
   }
 };
+
+struct extract_dispatch_fn {
+  reprog_device d_prog;
+
+  template <int stack_size>
+  std::unique_ptr<column> operator()(column_device_view const& d_strings,
+                                     size_type total_groups,
+                                     offset_type const* d_offsets,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    rmm::device_uvector<string_index_pair> indices(total_groups, stream);
+
+    thrust::for_each_n(rmm::exec_policy(stream),
+                       thrust::make_counting_iterator<size_type>(0),
+                       d_strings.size(),
+                       extract_fn<stack_size>{d_strings, d_prog, d_offsets, indices.data()});
+
+    return make_strings_column(indices.begin(), indices.end(), stream, mr);
+  }
+};
+
 }  // namespace
 
 /**
@@ -94,14 +117,14 @@ struct extract_fn {
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> extract_all_record(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   std::string const& pattern,
   regex_flags const flags,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  auto const strings_count = strings.size();
-  auto const d_strings     = column_device_view::create(strings.parent(), stream);
+  auto const strings_count = input.size();
+  auto const d_strings     = column_device_view::create(input.parent(), stream);
 
   // Compile regex into device object.
   auto d_prog =
@@ -143,29 +166,8 @@ std::unique_ptr<column> extract_all_record(
   auto const total_groups =
     cudf::detail::get_value<offset_type>(offsets->view(), strings_count, stream);
 
-  // Create an indices vector with the total number of groups that will be extracted.
-  rmm::device_uvector<string_index_pair> indices(total_groups, stream);
-  auto d_indices = indices.data();
-  auto begin     = thrust::make_counting_iterator<size_type>(0);
-
-  // Call the extract functor to fill in the indices vector.
-  auto const regex_insts = d_prog->insts_counts();
-  if (regex_insts <= RX_SMALL_INSTS) {
-    extract_fn<RX_STACK_SMALL> fn{*d_strings, *d_prog, d_offsets, d_indices};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn);
-  } else if (regex_insts <= RX_MEDIUM_INSTS) {
-    extract_fn<RX_STACK_MEDIUM> fn{*d_strings, *d_prog, d_offsets, d_indices};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn);
-  } else if (regex_insts <= RX_LARGE_INSTS) {
-    extract_fn<RX_STACK_LARGE> fn{*d_strings, *d_prog, d_offsets, d_indices};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn);
-  } else {
-    extract_fn<RX_STACK_ANY> fn{*d_strings, *d_prog, d_offsets, d_indices};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn);
-  }
-
-  // Build the child strings column from the indices.
-  auto strings_output = make_strings_column(indices.begin(), indices.end(), stream, mr);
+  auto strings_output = regex_dispatcher(
+    *d_prog, extract_dispatch_fn{*d_prog}, *d_strings, total_groups, d_offsets, stream, mr);
 
   // Build the lists column from the offsets and the strings.
   return make_lists_column(strings_count,
diff --git a/cpp/src/strings/regex/dispatcher.hpp b/cpp/src/strings/regex/dispatcher.hpp
new file mode 100644
index 00000000000..9ff51d1c979
--- /dev/null
+++ b/cpp/src/strings/regex/dispatcher.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <strings/regex/regex.cuh>
+
+namespace cudf {
+namespace strings {
+namespace detail {
+
+/**
+ * The stack is used to keep progress (state) on evaluating the regex instructions on each string.
+ * So the size of the stack is in proportion to the number of instructions in the given regex
+ * pattern.
+ *
+ * There are four call types based on the number of regex instructions in the given pattern.
+ * Small, medium, and large instruction counts can use the stack effectively.
+ * Smaller stack sizes execute faster.
+ *
+ * Patterns with instruction counts bigger than large use global memory rather than the stack
+ * for managing the evaluation state data.
+ *
+ * @tparam Functor The functor to invoke with stack size templated value.
+ * @tparam Ts Parameter types for the functor call.
+ */
+template <typename Functor, typename... Ts>
+constexpr decltype(auto) regex_dispatcher(reprog_device d_prog, Functor f, Ts&&... args)
+{
+  auto const num_regex_insts = d_prog.insts_counts();
+  if (num_regex_insts <= RX_SMALL_INSTS) {
+    return f.template operator()<RX_STACK_SMALL>(std::forward<Ts>(args)...);
+  }
+  if (num_regex_insts <= RX_MEDIUM_INSTS) {
+    return f.template operator()<RX_STACK_MEDIUM>(std::forward<Ts>(args)...);
+  }
+  if (num_regex_insts <= RX_LARGE_INSTS) {
+    return f.template operator()<RX_STACK_LARGE>(std::forward<Ts>(args)...);
+  }
+
+  return f.template operator()<RX_STACK_ANY>(std::forward<Ts>(args)...);
+}
+
+}  // namespace detail
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu
index ff86d7aa552..27e0bd4fac9 100644
--- a/cpp/src/strings/replace/backref_re.cu
+++ b/cpp/src/strings/replace/backref_re.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include "backref_re.cuh"
 
+#include <strings/regex/dispatcher.hpp>
 #include <strings/regex/regex.cuh>
 #include <strings/utilities.hpp>
 
@@ -95,27 +96,54 @@ std::pair<std::string, std::vector<backref_type>> parse_backrefs(std::string con
   return {rtn, backrefs};
 }
 
+template <typename Iterator>
+struct replace_dispatch_fn {
+  reprog_device d_prog;
+
+  template <int stack_size>
+  std::unique_ptr<column> operator()(strings_column_view const& input,
+                                     string_view const& d_repl_template,
+                                     Iterator backrefs_begin,
+                                     Iterator backrefs_end,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    auto const d_strings = column_device_view::create(input.parent(), stream);
+
+    auto children = make_strings_children(
+      backrefs_fn<Iterator, stack_size>{
+        *d_strings, d_prog, d_repl_template, backrefs_begin, backrefs_end},
+      input.size(),
+      stream,
+      mr);
+
+    return make_strings_column(input.size(),
+                               std::move(children.first),
+                               std::move(children.second),
+                               input.null_count(),
+                               cudf::detail::copy_bitmask(input.parent(), stream, mr));
+  }
+};
+
 }  // namespace
 
 //
 std::unique_ptr<column> replace_with_backrefs(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   std::string const& pattern,
   std::string const& replacement,
   regex_flags const flags,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  if (strings.is_empty()) return make_empty_column(type_id::STRING);
+  if (input.is_empty()) return make_empty_column(type_id::STRING);
 
   CUDF_EXPECTS(!pattern.empty(), "Parameter pattern must not be empty");
   CUDF_EXPECTS(!replacement.empty(), "Parameter replacement must not be empty");
 
-  auto d_strings = column_device_view::create(strings.parent(), stream);
   // compile regex into device object
   auto d_prog =
-    reprog_device::create(pattern, flags, get_character_flags_table(), strings.size(), stream);
-  auto const regex_insts = d_prog->insts_counts();
+    reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream);
 
   // parse the repl string for back-ref indicators
   auto const parse_result = parse_backrefs(replacement);
@@ -125,45 +153,14 @@ std::unique_ptr<column> replace_with_backrefs(
   string_view const d_repl_template = repl_scalar.value();
 
   using BackRefIterator = decltype(backrefs.begin());
-
-  // create child columns
-  auto [offsets, chars] = [&] {
-    if (regex_insts <= RX_SMALL_INSTS) {
-      return make_strings_children(
-        backrefs_fn<BackRefIterator, RX_STACK_SMALL>{
-          *d_strings, *d_prog, d_repl_template, backrefs.begin(), backrefs.end()},
-        strings.size(),
-        stream,
-        mr);
-    } else if (regex_insts <= RX_MEDIUM_INSTS) {
-      return make_strings_children(
-        backrefs_fn<BackRefIterator, RX_STACK_MEDIUM>{
-          *d_strings, *d_prog, d_repl_template, backrefs.begin(), backrefs.end()},
-        strings.size(),
-        stream,
-        mr);
-    } else if (regex_insts <= RX_LARGE_INSTS) {
-      return make_strings_children(
-        backrefs_fn<BackRefIterator, RX_STACK_LARGE>{
-          *d_strings, *d_prog, d_repl_template, backrefs.begin(), backrefs.end()},
-        strings.size(),
-        stream,
-        mr);
-    } else {
-      return make_strings_children(
-        backrefs_fn<BackRefIterator, RX_STACK_ANY>{
-          *d_strings, *d_prog, d_repl_template, backrefs.begin(), backrefs.end()},
-        strings.size(),
-        stream,
-        mr);
-    }
-  }();
-
-  return make_strings_column(strings.size(),
-                             std::move(offsets),
-                             std::move(chars),
-                             strings.null_count(),
-                             cudf::detail::copy_bitmask(strings.parent(), stream, mr));
+  return regex_dispatcher(*d_prog,
+                          replace_dispatch_fn<BackRefIterator>{*d_prog},
+                          input,
+                          d_repl_template,
+                          backrefs.begin(),
+                          backrefs.end(),
+                          stream,
+                          mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu
index 2b5380b76dd..22f6d2cba39 100644
--- a/cpp/src/strings/replace/multi_re.cu
+++ b/cpp/src/strings/replace/multi_re.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <strings/regex/dispatcher.hpp>
 #include <strings/regex/regex.cuh>
 #include <strings/utilities.hpp>
 
@@ -30,6 +31,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <algorithm>
+
 namespace cudf {
 namespace strings {
 namespace detail {
@@ -40,16 +43,6 @@ using found_range = thrust::pair<size_type, size_type>;
 /**
  * @brief This functor handles replacing strings by applying the compiled regex patterns
  * and inserting the corresponding new string within the matched range of characters.
- *
- * The logic includes computing the size of each string and also writing the output.
- *
- * The stack is used to keep progress on evaluating the regex instructions on each string.
- * So the size of the stack is in proportion to the number of instructions in the given regex
- * pattern.
- *
- * There are three call types based on the number of regex instructions in the given pattern.
- * Small to medium instruction lengths can use the stack effectively though smaller executes faster.
- * Longer patterns require global memory. Shorter patterns are common in data cleaning.
  */
 template <int stack_size>
 struct replace_multi_regex_fn {
@@ -127,69 +120,76 @@ struct replace_multi_regex_fn {
   }
 };
 
+struct replace_dispatch_fn {
+  template <int stack_size>
+  std::unique_ptr<column> operator()(strings_column_view const& input,
+                                     device_span<reprog_device const> d_progs,
+                                     strings_column_view const& replacements,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    auto const d_strings = column_device_view::create(input.parent(), stream);
+    auto const d_repls   = column_device_view::create(replacements.parent(), stream);
+
+    auto found_ranges = rmm::device_uvector<found_range>(d_progs.size() * input.size(), stream);
+
+    auto children = make_strings_children(
+      replace_multi_regex_fn<stack_size>{*d_strings, d_progs, found_ranges.data(), *d_repls},
+      input.size(),
+      stream,
+      mr);
+
+    return make_strings_column(input.size(),
+                               std::move(children.first),
+                               std::move(children.second),
+                               input.null_count(),
+                               cudf::detail::copy_bitmask(input.parent(), stream, mr));
+  }
+};
+
 }  // namespace
 
 std::unique_ptr<column> replace_re(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   std::vector<std::string> const& patterns,
   strings_column_view const& replacements,
   regex_flags const flags,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  auto strings_count = strings.size();
-  if (strings_count == 0) return make_empty_column(type_id::STRING);
-  if (patterns.empty())  // no patterns; just return a copy
-    return std::make_unique<column>(strings.parent(), stream, mr);
+  if (input.is_empty()) { return make_empty_column(type_id::STRING); }
+  if (patterns.empty()) {  // if no patterns; just return a copy
+    return std::make_unique<column>(input.parent(), stream, mr);
+  }
 
   CUDF_EXPECTS(!replacements.has_nulls(), "Parameter replacements must not have any nulls");
 
-  auto d_strings    = column_device_view::create(strings.parent(), stream);
-  auto d_repls      = column_device_view::create(replacements.parent(), stream);
-  auto d_char_table = get_character_flags_table();
-
   // compile regexes into device objects
-  size_type regex_insts = 0;
-  std::vector<std::unique_ptr<reprog_device, std::function<void(reprog_device*)>>> h_progs;
-  std::vector<reprog_device> progs;
-  for (auto itr = patterns.begin(); itr != patterns.end(); ++itr) {
-    auto prog   = reprog_device::create(*itr, flags, d_char_table, strings_count, stream);
-    regex_insts = std::max(regex_insts, prog->insts_counts());
-    progs.push_back(*prog);
-    h_progs.emplace_back(std::move(prog));
-  }
+  auto const d_char_table = get_character_flags_table();
+  auto h_progs = std::vector<std::unique_ptr<reprog_device, std::function<void(reprog_device*)>>>(
+    patterns.size());
+  std::transform(patterns.begin(),
+                 patterns.end(),
+                 h_progs.begin(),
+                 [flags, d_char_table, input, stream](auto const& ptn) {
+                   return reprog_device::create(ptn, flags, d_char_table, input.size(), stream);
+                 });
+
+  // get the longest regex for the dispatcher
+  auto const max_prog =
+    std::max_element(h_progs.begin(), h_progs.end(), [](auto const& lhs, auto const& rhs) {
+      return lhs->insts_counts() < rhs->insts_counts();
+    });
 
   // copy all the reprog_device instances to a device memory array
+  std::vector<reprog_device> progs;
+  std::transform(h_progs.begin(), h_progs.end(), std::back_inserter(progs), [](auto const& d_prog) {
+    return *d_prog;
+  });
   auto d_progs = cudf::detail::make_device_uvector_async(progs, stream);
 
-  // create working buffer for ranges pairs
-  rmm::device_uvector<found_range> found_ranges(patterns.size() * strings_count, stream);
-  auto d_found_ranges = found_ranges.data();
-
-  // create child columns
-  auto children = [&] {
-    // Each invocation is predicated on the stack size which is dependent on the number of regex
-    // instructions
-    if (regex_insts <= RX_SMALL_INSTS) {
-      replace_multi_regex_fn<RX_STACK_SMALL> fn{*d_strings, d_progs, d_found_ranges, *d_repls};
-      return make_strings_children(fn, strings_count, stream, mr);
-    } else if (regex_insts <= RX_MEDIUM_INSTS) {
-      replace_multi_regex_fn<RX_STACK_MEDIUM> fn{*d_strings, d_progs, d_found_ranges, *d_repls};
-      return make_strings_children(fn, strings_count, stream, mr);
-    } else if (regex_insts <= RX_LARGE_INSTS) {
-      replace_multi_regex_fn<RX_STACK_LARGE> fn{*d_strings, d_progs, d_found_ranges, *d_repls};
-      return make_strings_children(fn, strings_count, stream, mr);
-    } else {
-      replace_multi_regex_fn<RX_STACK_ANY> fn{*d_strings, d_progs, d_found_ranges, *d_repls};
-      return make_strings_children(fn, strings_count, stream, mr);
-    }
-  }();
-
-  return make_strings_column(strings_count,
-                             std::move(children.first),
-                             std::move(children.second),
-                             strings.null_count(),
-                             cudf::detail::copy_bitmask(strings.parent(), stream, mr));
+  return regex_dispatcher(
+    **max_prog, replace_dispatch_fn{}, input, d_progs, replacements, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu
index 2c594bb86a8..d42359deeac 100644
--- a/cpp/src/strings/replace/replace_re.cu
+++ b/cpp/src/strings/replace/replace_re.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <strings/regex/dispatcher.hpp>
 #include <strings/regex/regex.cuh>
 #include <strings/utilities.hpp>
 
@@ -36,16 +37,6 @@ namespace {
 /**
  * @brief This functor handles replacing strings by applying the compiled regex pattern
  * and inserting the new string within the matched range of characters.
- *
- * The logic includes computing the size of each string and also writing the output.
- *
- * The stack is used to keep progress on evaluating the regex instructions on each string.
- * So the size of the stack is in proportion to the number of instructions in the given regex
- * pattern.
- *
- * There are three call types based on the number of regex instructions in the given pattern.
- * Small to medium instruction lengths can use the stack effectively though smaller executes faster.
- * Longer patterns require global memory. Shorter patterns are common in data cleaning.
  */
 template <int stack_size>
 struct replace_regex_fn {
@@ -108,11 +99,37 @@ struct replace_regex_fn {
   }
 };
 
+struct replace_dispatch_fn {
+  reprog_device d_prog;
+
+  template <int stack_size>
+  std::unique_ptr<column> operator()(strings_column_view const& input,
+                                     string_view const& d_replacement,
+                                     size_type max_replace_count,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    auto const d_strings = column_device_view::create(input.parent(), stream);
+
+    auto children = make_strings_children(
+      replace_regex_fn<stack_size>{*d_strings, d_prog, d_replacement, max_replace_count},
+      input.size(),
+      stream,
+      mr);
+
+    return make_strings_column(input.size(),
+                               std::move(children.first),
+                               std::move(children.second),
+                               input.null_count(),
+                               cudf::detail::copy_bitmask(input.parent(), stream, mr));
+  }
+};
+
 }  // namespace
 
 //
 std::unique_ptr<column> replace_re(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   std::string const& pattern,
   string_scalar const& replacement,
   std::optional<size_type> max_replace_count,
@@ -120,49 +137,19 @@ std::unique_ptr<column> replace_re(
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  auto strings_count = strings.size();
-  if (strings_count == 0) return make_empty_column(type_id::STRING);
+  if (input.is_empty()) return make_empty_column(type_id::STRING);
 
   CUDF_EXPECTS(replacement.is_valid(stream), "Parameter replacement must be valid");
   string_view d_repl(replacement.data(), replacement.size());
 
-  auto strings_column = column_device_view::create(strings.parent(), stream);
-  auto d_strings      = *strings_column;
   // compile regex into device object
-  auto prog =
-    reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream);
-  auto d_prog            = *prog;
-  auto const regex_insts = d_prog.insts_counts();
-
-  // copy null mask
-  auto null_mask        = cudf::detail::copy_bitmask(strings.parent(), stream, mr);
-  auto const null_count = strings.null_count();
-  auto const maxrepl    = max_replace_count.value_or(-1);
-
-  // create child columns
-  auto children = [&] {
-    // Each invocation is predicated on the stack size which is dependent on the number of regex
-    // instructions
-    if (regex_insts <= RX_SMALL_INSTS) {
-      replace_regex_fn<RX_STACK_SMALL> fn{d_strings, d_prog, d_repl, maxrepl};
-      return make_strings_children(fn, strings_count, stream, mr);
-    } else if (regex_insts <= RX_MEDIUM_INSTS) {
-      replace_regex_fn<RX_STACK_MEDIUM> fn{d_strings, d_prog, d_repl, maxrepl};
-      return make_strings_children(fn, strings_count, stream, mr);
-    } else if (regex_insts <= RX_LARGE_INSTS) {
-      replace_regex_fn<RX_STACK_LARGE> fn{d_strings, d_prog, d_repl, maxrepl};
-      return make_strings_children(fn, strings_count, stream, mr);
-    } else {
-      replace_regex_fn<RX_STACK_ANY> fn{d_strings, d_prog, d_repl, maxrepl};
-      return make_strings_children(fn, strings_count, stream, mr);
-    }
-  }();
+  auto d_prog =
+    reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream);
+
+  auto const maxrepl = max_replace_count.value_or(-1);
 
-  return make_strings_column(strings_count,
-                             std::move(children.first),
-                             std::move(children.second),
-                             null_count,
-                             std::move(null_mask));
+  return regex_dispatcher(
+    *d_prog, replace_dispatch_fn{*d_prog}, input, d_repl, maxrepl, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index 810e44cc27d..201556033ad 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+#include <strings/count_matches.hpp>
+#include <strings/regex/dispatcher.hpp>
+#include <strings/regex/regex.cuh>
+#include <strings/utilities.hpp>
+
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/null_mask.hpp>
@@ -24,19 +29,16 @@
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
 
-#include <strings/regex/regex.cuh>
-#include <strings/utilities.hpp>
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/extrema.h>
+#include <thrust/reduce.h>
+#include <thrust/transform.h>
 
 namespace cudf {
 namespace strings {
 namespace detail {
 using string_index_pair = thrust::pair<const char*, size_type>;
-using findall_result    = thrust::pair<size_type, string_index_pair>;
 
 namespace {
 /**
@@ -47,27 +49,20 @@ template <int stack_size>
 struct findall_fn {
   column_device_view const d_strings;
   reprog_device prog;
-  size_type column_index;
+  size_type const column_index;
   size_type const* d_counts;
 
-  findall_fn(column_device_view const& d_strings,
-             reprog_device& prog,
-             size_type column_index    = -1,
-             size_type const* d_counts = nullptr)
-    : d_strings(d_strings), prog(prog), column_index(column_index), d_counts(d_counts)
+  __device__ string_index_pair operator()(size_type idx)
   {
-  }
+    if (d_strings.is_null(idx) || (column_index >= d_counts[idx])) {
+      return string_index_pair{nullptr, 0};
+    }
+
+    auto const d_str  = d_strings.element<string_view>(idx);
+    auto const nchars = d_str.length();
+    int32_t spos      = 0;
+    auto epos         = static_cast<int32_t>(nchars);
 
-  // this will count columns as well as locate a specific string for a column
-  __device__ findall_result findall(size_type idx)
-  {
-    string_index_pair result{nullptr, 0};
-    if (d_strings.is_null(idx) || (d_counts && (column_index >= d_counts[idx])))
-      return findall_result{0, result};
-    string_view d_str      = d_strings.element<string_view>(idx);
-    auto const nchars      = d_str.length();
-    int32_t spos           = 0;
-    auto epos              = static_cast<int32_t>(nchars);
     size_type column_count = 0;
     while (spos <= nchars) {
       if (prog.find<stack_size>(idx, d_str, spos, epos) <= 0) break;  // no more matches found
@@ -76,36 +71,40 @@ struct findall_fn {
       epos = static_cast<int32_t>(nchars);
       ++column_count;
     }
-    if (spos <= epos) {
-      spos   = d_str.byte_offset(spos);  // convert
-      epos   = d_str.byte_offset(epos);  // to bytes
-      result = string_index_pair{d_str.data() + spos, (epos - spos)};
-    }
-    // return the strings location and the column count
-    return findall_result{column_count, result};
-  }
 
-  __device__ string_index_pair operator()(size_type idx)
-  {
-    // this one only cares about the string
-    return findall(idx).second;
+    auto const result = [&] {
+      if (spos > epos) { return string_index_pair{nullptr, 0}; }
+      // convert character positions to byte positions
+      spos = d_str.byte_offset(spos);
+      epos = d_str.byte_offset(epos);
+      return string_index_pair{d_str.data() + spos, (epos - spos)};
+    }();
+
+    return result;
   }
 };
 
-template <size_t stack_size>
-struct findall_count_fn : public findall_fn<stack_size> {
-  findall_count_fn(column_device_view const& strings, reprog_device& prog)
-    : findall_fn<stack_size>{strings, prog}
-  {
-  }
+struct findall_dispatch_fn {
+  reprog_device d_prog;
 
-  __device__ size_type operator()(size_type idx)
+  template <int stack_size>
+  std::unique_ptr<column> operator()(column_device_view const& d_strings,
+                                     size_type column_index,
+                                     size_type const* d_find_counts,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
   {
-    // this one only cares about the column count
-    return findall_fn<stack_size>::findall(idx).first;
+    rmm::device_uvector<string_index_pair> indices(d_strings.size(), stream);
+
+    thrust::transform(rmm::exec_policy(stream),
+                      thrust::make_counting_iterator<size_type>(0),
+                      thrust::make_counting_iterator<size_type>(d_strings.size()),
+                      indices.begin(),
+                      findall_fn<stack_size>{d_strings, d_prog, column_index, d_find_counts});
+
+    return make_strings_column(indices.begin(), indices.end(), stream, mr);
   }
 };
-
 }  // namespace
 
 //
@@ -124,38 +123,15 @@ std::unique_ptr<table> findall(
     reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream);
   auto const regex_insts = d_prog->insts_counts();
 
-  rmm::device_uvector<size_type> find_counts(strings_count, stream);
-  auto d_find_counts = find_counts.data();
-
-  if (regex_insts <= RX_SMALL_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_find_counts,
-                      findall_count_fn<RX_STACK_SMALL>{*d_strings, *d_prog});
-  else if (regex_insts <= RX_MEDIUM_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_find_counts,
-                      findall_count_fn<RX_STACK_MEDIUM>{*d_strings, *d_prog});
-  else if (regex_insts <= RX_LARGE_INSTS)
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_find_counts,
-                      findall_count_fn<RX_STACK_LARGE>{*d_strings, *d_prog});
-  else
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(strings_count),
-                      d_find_counts,
-                      findall_count_fn<RX_STACK_ANY>{*d_strings, *d_prog});
+  auto find_counts =
+    count_matches(*d_strings, *d_prog, stream, rmm::mr::get_current_device_resource());
+  auto d_find_counts = find_counts->mutable_view().data<size_type>();
 
   std::vector<std::unique_ptr<column>> results;
 
   size_type const columns = thrust::reduce(
-    rmm::exec_policy(stream), find_counts.begin(), find_counts.end(), 0, thrust::maximum{});
+    rmm::exec_policy(stream), d_find_counts, d_find_counts + strings_count, 0, thrust::maximum{});
+
   // boundary case: if no columns, return all nulls column (issue #119)
   if (columns == 0)
     results.emplace_back(std::make_unique<column>(
@@ -166,39 +142,10 @@ std::unique_ptr<table> findall(
       strings_count));
 
   for (int32_t column_index = 0; column_index < columns; ++column_index) {
-    rmm::device_uvector<string_index_pair> indices(strings_count, stream);
-
-    if (regex_insts <= RX_SMALL_INSTS)
-      thrust::transform(
-        rmm::exec_policy(stream),
-        thrust::make_counting_iterator<size_type>(0),
-        thrust::make_counting_iterator<size_type>(strings_count),
-        indices.begin(),
-        findall_fn<RX_STACK_SMALL>{*d_strings, *d_prog, column_index, d_find_counts});
-    else if (regex_insts <= RX_MEDIUM_INSTS)
-      thrust::transform(
-        rmm::exec_policy(stream),
-        thrust::make_counting_iterator<size_type>(0),
-        thrust::make_counting_iterator<size_type>(strings_count),
-        indices.begin(),
-        findall_fn<RX_STACK_MEDIUM>{*d_strings, *d_prog, column_index, d_find_counts});
-    else if (regex_insts <= RX_LARGE_INSTS)
-      thrust::transform(
-        rmm::exec_policy(stream),
-        thrust::make_counting_iterator<size_type>(0),
-        thrust::make_counting_iterator<size_type>(strings_count),
-        indices.begin(),
-        findall_fn<RX_STACK_LARGE>{*d_strings, *d_prog, column_index, d_find_counts});
-    else
-      thrust::transform(rmm::exec_policy(stream),
-                        thrust::make_counting_iterator<size_type>(0),
-                        thrust::make_counting_iterator<size_type>(strings_count),
-                        indices.begin(),
-                        findall_fn<RX_STACK_ANY>{*d_strings, *d_prog, column_index, d_find_counts});
-
-    //
-    results.emplace_back(make_strings_column(indices.begin(), indices.end(), stream, mr));
+    results.emplace_back(regex_dispatcher(
+      *d_prog, findall_dispatch_fn{*d_prog}, *d_strings, column_index, d_find_counts, stream, mr));
   }
+
   return std::make_unique<table>(std::move(results));
 }
 
diff --git a/cpp/src/strings/search/findall_record.cu b/cpp/src/strings/search/findall_record.cu
index c93eb0c17db..95e347a7c35 100644
--- a/cpp/src/strings/search/findall_record.cu
+++ b/cpp/src/strings/search/findall_record.cu
@@ -15,6 +15,9 @@
  */
 
 #include <strings/count_matches.hpp>
+#include <strings/regex/dispatcher.hpp>
+#include <strings/regex/regex.cuh>
+#include <strings/utilities.hpp>
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
@@ -26,9 +29,6 @@
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
 
-#include <strings/regex/regex.cuh>
-#include <strings/utilities.hpp>
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
@@ -75,6 +75,27 @@ struct findall_fn {
   }
 };
 
+struct findall_dispatch_fn {
+  reprog_device d_prog;
+
+  template <int stack_size>
+  std::unique_ptr<column> operator()(column_device_view const& d_strings,
+                                     size_type total_matches,
+                                     offset_type const* d_offsets,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    rmm::device_uvector<string_index_pair> indices(total_matches, stream);
+
+    thrust::for_each_n(rmm::exec_policy(stream),
+                       thrust::make_counting_iterator<size_type>(0),
+                       d_strings.size(),
+                       findall_fn<stack_size>{d_strings, d_prog, d_offsets, indices.data()});
+
+    return make_strings_column(indices.begin(), indices.end(), stream, mr);
+  }
+};
+
 }  // namespace
 
 //
@@ -121,30 +142,11 @@ std::unique_ptr<column> findall_record(
     rmm::exec_policy(stream), d_offsets, d_offsets + strings_count + 1, d_offsets);
 
   // Create indices vector with the total number of groups that will be extracted
-  auto total_matches = cudf::detail::get_value<size_type>(offsets->view(), strings_count, stream);
-
-  rmm::device_uvector<string_index_pair> indices(total_matches, stream);
-  auto d_indices = indices.data();
-  auto begin     = thrust::make_counting_iterator<size_type>(0);
-
-  // Build the string indices
-  auto const regex_insts = d_prog->insts_counts();
-  if (regex_insts <= RX_SMALL_INSTS) {
-    findall_fn<RX_STACK_SMALL> fn{*d_strings, *d_prog, d_offsets, d_indices};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn);
-  } else if (regex_insts <= RX_MEDIUM_INSTS) {
-    findall_fn<RX_STACK_MEDIUM> fn{*d_strings, *d_prog, d_offsets, d_indices};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn);
-  } else if (regex_insts <= RX_LARGE_INSTS) {
-    findall_fn<RX_STACK_LARGE> fn{*d_strings, *d_prog, d_offsets, d_indices};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn);
-  } else {
-    findall_fn<RX_STACK_ANY> fn{*d_strings, *d_prog, d_offsets, d_indices};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn);
-  }
+  auto const total_matches =
+    cudf::detail::get_value<size_type>(offsets->view(), strings_count, stream);
 
-  // Build the child strings column from the resulting indices
-  auto strings_output = make_strings_column(indices.begin(), indices.end(), stream, mr);
+  auto strings_output = regex_dispatcher(
+    *d_prog, findall_dispatch_fn{*d_prog}, *d_strings, total_matches, d_offsets, stream, mr);
 
   // Build the lists column from the offsets and the strings
   return make_lists_column(strings_count,
diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu
index d80148f2fe6..a8a2467dd76 100644
--- a/cpp/src/strings/split/split_re.cu
+++ b/cpp/src/strings/split/split_re.cu
@@ -15,6 +15,7 @@
  */
 
 #include <strings/count_matches.hpp>
+#include <strings/regex/dispatcher.hpp>
 #include <strings/regex/regex.cuh>
 #include <strings/utilities.hpp>
 
@@ -110,6 +111,28 @@ struct token_reader_fn {
   }
 };
 
+struct generate_dispatch_fn {
+  reprog_device d_prog;
+
+  template <int stack_size>
+  rmm::device_uvector<string_index_pair> operator()(column_device_view const& d_strings,
+                                                    size_type total_tokens,
+                                                    split_direction direction,
+                                                    offset_type const* d_offsets,
+                                                    rmm::cuda_stream_view stream)
+  {
+    rmm::device_uvector<string_index_pair> tokens(total_tokens, stream);
+
+    thrust::for_each_n(
+      rmm::exec_policy(stream),
+      thrust::make_counting_iterator<size_type>(0),
+      d_strings.size(),
+      token_reader_fn<stack_size>{d_strings, d_prog, direction, d_offsets, tokens.data()});
+
+    return tokens;
+  }
+};
+
 /**
  * @brief Call regex to split each input string into tokens.
  *
@@ -148,24 +171,8 @@ rmm::device_uvector<string_index_pair> generate_tokens(column_device_view const&
   // the last offset entry is the total number of tokens to be generated
   auto const total_tokens = cudf::detail::get_value<offset_type>(offsets, strings_count, stream);
 
-  // generate tokens for each string
-  rmm::device_uvector<string_index_pair> tokens(total_tokens, stream);
-  auto const regex_insts = d_prog.insts_counts();
-  if (regex_insts <= RX_SMALL_INSTS) {
-    token_reader_fn<RX_STACK_SMALL> reader{d_strings, d_prog, direction, d_offsets, tokens.data()};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, reader);
-  } else if (regex_insts <= RX_MEDIUM_INSTS) {
-    token_reader_fn<RX_STACK_MEDIUM> reader{d_strings, d_prog, direction, d_offsets, tokens.data()};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, reader);
-  } else if (regex_insts <= RX_LARGE_INSTS) {
-    token_reader_fn<RX_STACK_LARGE> reader{d_strings, d_prog, direction, d_offsets, tokens.data()};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, reader);
-  } else {
-    token_reader_fn<RX_STACK_ANY> reader{d_strings, d_prog, direction, d_offsets, tokens.data()};
-    thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, reader);
-  }
-
-  return tokens;
+  return regex_dispatcher(
+    d_prog, generate_dispatch_fn{d_prog}, d_strings, total_tokens, direction, d_offsets, stream);
 }
 
 /**
diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu
index 5cc4ce5f6c9..f77ab7aa3d9 100644
--- a/cpp/src/unary/cast_ops.cu
+++ b/cpp/src/unary/cast_ops.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,19 +34,19 @@ namespace detail {
 namespace {  // anonymous namespace
 template <typename _TargetT>
 struct unary_cast {
-  template <typename SourceT,
-            typename TargetT                                          = _TargetT,
-            typename std::enable_if_t<(cudf::is_numeric<SourceT>() &&
-                                       cudf::is_numeric<TargetT>())>* = nullptr>
+  template <
+    typename SourceT,
+    typename TargetT                                                                = _TargetT,
+    std::enable_if_t<(cudf::is_numeric<SourceT>() && cudf::is_numeric<TargetT>())>* = nullptr>
   __device__ inline TargetT operator()(SourceT const element)
   {
     return static_cast<TargetT>(element);
   }
 
-  template <typename SourceT,
-            typename TargetT                                            = _TargetT,
-            typename std::enable_if_t<(cudf::is_timestamp<SourceT>() &&
-                                       cudf::is_timestamp<TargetT>())>* = nullptr>
+  template <
+    typename SourceT,
+    typename TargetT                                                                    = _TargetT,
+    std::enable_if_t<(cudf::is_timestamp<SourceT>() && cudf::is_timestamp<TargetT>())>* = nullptr>
   __device__ inline TargetT operator()(SourceT const element)
   {
     // Convert source tick counts into target tick counts without blindly truncating them
@@ -55,46 +55,46 @@ struct unary_cast {
     return TargetT{cuda::std::chrono::floor<TargetT::duration>(element.time_since_epoch())};
   }
 
-  template <typename SourceT,
-            typename TargetT                                           = _TargetT,
-            typename std::enable_if_t<(cudf::is_duration<SourceT>() &&
-                                       cudf::is_duration<TargetT>())>* = nullptr>
+  template <
+    typename SourceT,
+    typename TargetT                                                                  = _TargetT,
+    std::enable_if_t<(cudf::is_duration<SourceT>() && cudf::is_duration<TargetT>())>* = nullptr>
   __device__ inline TargetT operator()(SourceT const element)
   {
     return TargetT{cuda::std::chrono::floor<TargetT>(element)};
   }
 
-  template <typename SourceT,
-            typename TargetT                                         = _TargetT,
-            typename std::enable_if_t<cudf::is_numeric<SourceT>() &&
-                                      cudf::is_duration<TargetT>()>* = nullptr>
+  template <
+    typename SourceT,
+    typename TargetT                                                               = _TargetT,
+    std::enable_if_t<cudf::is_numeric<SourceT>() && cudf::is_duration<TargetT>()>* = nullptr>
   __device__ inline TargetT operator()(SourceT const element)
   {
     return TargetT{static_cast<typename TargetT::rep>(element)};
   }
 
-  template <typename SourceT,
-            typename TargetT                                           = _TargetT,
-            typename std::enable_if_t<(cudf::is_timestamp<SourceT>() &&
-                                       cudf::is_duration<TargetT>())>* = nullptr>
+  template <
+    typename SourceT,
+    typename TargetT                                                                   = _TargetT,
+    std::enable_if_t<(cudf::is_timestamp<SourceT>() && cudf::is_duration<TargetT>())>* = nullptr>
   __device__ inline TargetT operator()(SourceT const element)
   {
     return TargetT{cuda::std::chrono::floor<TargetT>(element.time_since_epoch())};
   }
 
-  template <typename SourceT,
-            typename TargetT                                        = _TargetT,
-            typename std::enable_if_t<cudf::is_duration<SourceT>() &&
-                                      cudf::is_numeric<TargetT>()>* = nullptr>
+  template <
+    typename SourceT,
+    typename TargetT                                                               = _TargetT,
+    std::enable_if_t<cudf::is_duration<SourceT>() && cudf::is_numeric<TargetT>()>* = nullptr>
   __device__ inline TargetT operator()(SourceT const element)
   {
     return static_cast<TargetT>(element.count());
   }
 
-  template <typename SourceT,
-            typename TargetT                                            = _TargetT,
-            typename std::enable_if_t<(cudf::is_duration<SourceT>() &&
-                                       cudf::is_timestamp<TargetT>())>* = nullptr>
+  template <
+    typename SourceT,
+    typename TargetT                                                                   = _TargetT,
+    std::enable_if_t<(cudf::is_duration<SourceT>() && cudf::is_timestamp<TargetT>())>* = nullptr>
   __device__ inline TargetT operator()(SourceT const element)
   {
     return TargetT{cuda::std::chrono::floor<TargetT::duration>(element)};
@@ -107,20 +107,20 @@ struct fixed_point_unary_cast {
   using FixedPointT = std::conditional_t<cudf::is_fixed_point<_SourceT>(), _SourceT, _TargetT>;
   using DeviceT     = device_storage_type_t<FixedPointT>;
 
-  template <typename SourceT                                          = _SourceT,
-            typename TargetT                                          = _TargetT,
-            typename std::enable_if_t<(cudf::is_fixed_point<_SourceT>() &&
-                                       cudf::is_numeric<TargetT>())>* = nullptr>
+  template <
+    typename SourceT                                                                     = _SourceT,
+    typename TargetT                                                                     = _TargetT,
+    std::enable_if_t<(cudf::is_fixed_point<_SourceT>() && cudf::is_numeric<TargetT>())>* = nullptr>
   __device__ inline TargetT operator()(DeviceT const element)
   {
     auto const fp = SourceT{numeric::scaled_integer<DeviceT>{element, scale}};
     return static_cast<TargetT>(fp);
   }
 
-  template <typename SourceT                                              = _SourceT,
-            typename TargetT                                              = _TargetT,
-            typename std::enable_if_t<(cudf::is_numeric<_SourceT>() &&
-                                       cudf::is_fixed_point<TargetT>())>* = nullptr>
+  template <
+    typename SourceT                                                                     = _SourceT,
+    typename TargetT                                                                     = _TargetT,
+    std::enable_if_t<(cudf::is_numeric<_SourceT>() && cudf::is_fixed_point<TargetT>())>* = nullptr>
   __device__ inline DeviceT operator()(SourceT const element)
   {
     return TargetT{element, scale}.value();
@@ -169,7 +169,7 @@ struct device_cast {
  *
  * @return std::unique_ptr<column> Returned column with new @p scale
  */
-template <typename T, typename std::enable_if_t<is_fixed_point<T>()>* = nullptr>
+template <typename T, std::enable_if_t<is_fixed_point<T>()>* = nullptr>
 std::unique_ptr<column> rescale(column_view input,
                                 numeric::scale_type scale,
                                 rmm::cuda_stream_view stream,
@@ -207,10 +207,9 @@ struct dispatch_unary_cast_to {
 
   dispatch_unary_cast_to(column_view inp) : input(inp) {}
 
-  template <
-    typename TargetT,
-    typename SourceT                                                                  = _SourceT,
-    typename std::enable_if_t<is_supported_non_fixed_point_cast<SourceT, TargetT>()>* = nullptr>
+  template <typename TargetT,
+            typename SourceT                                                         = _SourceT,
+            std::enable_if_t<is_supported_non_fixed_point_cast<SourceT, TargetT>()>* = nullptr>
   std::unique_ptr<column> operator()(data_type type,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
@@ -234,10 +233,10 @@ struct dispatch_unary_cast_to {
     return output;
   }
 
-  template <typename TargetT,
-            typename SourceT                                        = _SourceT,
-            typename std::enable_if_t<cudf::is_fixed_point<SourceT>() &&
-                                      cudf::is_numeric<TargetT>()>* = nullptr>
+  template <
+    typename TargetT,
+    typename SourceT                                                                  = _SourceT,
+    std::enable_if_t<cudf::is_fixed_point<SourceT>() && cudf::is_numeric<TargetT>()>* = nullptr>
   std::unique_ptr<column> operator()(data_type type,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
@@ -264,10 +263,10 @@ struct dispatch_unary_cast_to {
     return output;
   }
 
-  template <typename TargetT,
-            typename SourceT                                            = _SourceT,
-            typename std::enable_if_t<cudf::is_numeric<SourceT>() &&
-                                      cudf::is_fixed_point<TargetT>()>* = nullptr>
+  template <
+    typename TargetT,
+    typename SourceT                                                                  = _SourceT,
+    std::enable_if_t<cudf::is_numeric<SourceT>() && cudf::is_fixed_point<TargetT>()>* = nullptr>
   std::unique_ptr<column> operator()(data_type type,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
@@ -294,11 +293,10 @@ struct dispatch_unary_cast_to {
     return output;
   }
 
-  template <
-    typename TargetT,
-    typename SourceT                                             = _SourceT,
-    typename std::enable_if_t<cudf::is_fixed_point<SourceT>() && cudf::is_fixed_point<TargetT>() &&
-                              std::is_same_v<SourceT, TargetT>>* = nullptr>
+  template <typename TargetT,
+            typename SourceT                                    = _SourceT,
+            std::enable_if_t<cudf::is_fixed_point<SourceT>() && cudf::is_fixed_point<TargetT>() &&
+                             std::is_same_v<SourceT, TargetT>>* = nullptr>
   std::unique_ptr<column> operator()(data_type type,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
@@ -310,11 +308,10 @@ struct dispatch_unary_cast_to {
     return detail::rescale<TargetT>(input, numeric::scale_type{type.scale()}, stream, mr);
   }
 
-  template <
-    typename TargetT,
-    typename SourceT                                                 = _SourceT,
-    typename std::enable_if_t<cudf::is_fixed_point<SourceT>() && cudf::is_fixed_point<TargetT>() &&
-                              not std::is_same_v<SourceT, TargetT>>* = nullptr>
+  template <typename TargetT,
+            typename SourceT                                        = _SourceT,
+            std::enable_if_t<cudf::is_fixed_point<SourceT>() && cudf::is_fixed_point<TargetT>() &&
+                             not std::is_same_v<SourceT, TargetT>>* = nullptr>
   std::unique_ptr<column> operator()(data_type type,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
@@ -356,8 +353,8 @@ struct dispatch_unary_cast_to {
   }
 
   template <typename TargetT,
-            typename SourceT                                                      = _SourceT,
-            typename std::enable_if_t<not is_supported_cast<SourceT, TargetT>()>* = nullptr>
+            typename SourceT                                             = _SourceT,
+            std::enable_if_t<not is_supported_cast<SourceT, TargetT>()>* = nullptr>
   std::unique_ptr<column> operator()(data_type,
                                      rmm::cuda_stream_view,
                                      rmm::mr::device_memory_resource*)
@@ -379,7 +376,7 @@ struct dispatch_unary_cast_from {
 
   dispatch_unary_cast_from(column_view inp) : input(inp) {}
 
-  template <typename T, typename std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
   std::unique_ptr<column> operator()(data_type type,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu
index 474c7b76ddc..e92d5a1ca7e 100644
--- a/cpp/src/unary/math_ops.cu
+++ b/cpp/src/unary/math_ops.cu
@@ -348,7 +348,7 @@ std::unique_ptr<cudf::column> transform_fn(cudf::dictionary_column_view const& i
 
 template <typename UFN>
 struct MathOpDispatcher {
-  template <typename T, typename std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
@@ -362,7 +362,7 @@ struct MathOpDispatcher {
   }
 
   struct dictionary_dispatch {
-    template <typename T, typename std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
+    template <typename T, std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
     std::unique_ptr<cudf::column> operator()(cudf::dictionary_column_view const& input,
                                              rmm::cuda_stream_view stream,
                                              rmm::mr::device_memory_resource* mr)
@@ -377,9 +377,9 @@ struct MathOpDispatcher {
     }
   };
 
-  template <typename T,
-            typename std::enable_if_t<!std::is_arithmetic_v<T> and
-                                      std::is_same_v<T, dictionary32>>* = nullptr>
+  template <
+    typename T,
+    std::enable_if_t<!std::is_arithmetic_v<T> and std::is_same_v<T, dictionary32>>* = nullptr>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
@@ -401,7 +401,7 @@ struct MathOpDispatcher {
 
 template <typename UFN>
 struct BitwiseOpDispatcher {
-  template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
@@ -415,7 +415,7 @@ struct BitwiseOpDispatcher {
   }
 
   struct dictionary_dispatch {
-    template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+    template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
     std::unique_ptr<cudf::column> operator()(cudf::dictionary_column_view const& input,
                                              rmm::cuda_stream_view stream,
                                              rmm::mr::device_memory_resource* mr)
@@ -431,8 +431,7 @@ struct BitwiseOpDispatcher {
   };
 
   template <typename T,
-            typename std::enable_if_t<!std::is_integral_v<T> and std::is_same_v<T, dictionary32>>* =
-              nullptr>
+            std::enable_if_t<!std::is_integral_v<T> and std::is_same_v<T, dictionary32>>* = nullptr>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
@@ -462,7 +461,7 @@ struct LogicalOpDispatcher {
   }
 
  public:
-  template <typename T, typename std::enable_if_t<is_supported<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<is_supported<T>()>* = nullptr>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
@@ -477,7 +476,7 @@ struct LogicalOpDispatcher {
   }
 
   struct dictionary_dispatch {
-    template <typename T, typename std::enable_if_t<is_supported<T>()>* = nullptr>
+    template <typename T, std::enable_if_t<is_supported<T>()>* = nullptr>
     std::unique_ptr<cudf::column> operator()(cudf::dictionary_column_view const& input,
                                              rmm::cuda_stream_view stream,
                                              rmm::mr::device_memory_resource* mr)
@@ -499,9 +498,8 @@ struct LogicalOpDispatcher {
     }
   };
 
-  template <
-    typename T,
-    typename std::enable_if_t<!is_supported<T>() and std::is_same_v<T, dictionary32>>* = nullptr>
+  template <typename T,
+            std::enable_if_t<!is_supported<T>() and std::is_same_v<T, dictionary32>>* = nullptr>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
diff --git a/cpp/tests/binaryop/binop-fixture.hpp b/cpp/tests/binaryop/binop-fixture.hpp
index 65243b1ae2e..2ba5561826e 100644
--- a/cpp/tests/binaryop/binop-fixture.hpp
+++ b/cpp/tests/binaryop/binop-fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Copyright 2018-2019 BlazingDB, Inc.
  *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
@@ -61,14 +61,14 @@ struct BinaryOperationTest : public cudf::test::BaseFixture {
     return cudf::test::fixed_width_column_wrapper<T>(data_iter, data_iter + size, validity_iter);
   }
 
-  template <typename T, typename std::enable_if_t<!std::is_same_v<T, std::string>>* = nullptr>
+  template <typename T, std::enable_if_t<!std::is_same_v<T, std::string>>* = nullptr>
   auto make_random_wrapped_scalar()
   {
     cudf::test::UniformRandomGenerator<T> rand_gen(r_min, r_max);
     return cudf::scalar_type_t<T>(rand_gen.generate());
   }
 
-  template <typename T, typename std::enable_if_t<std::is_same_v<T, std::string>>* = nullptr>
+  template <typename T, std::enable_if_t<std::is_same_v<T, std::string>>* = nullptr>
   auto make_random_wrapped_scalar()
   {
     cudf::test::UniformRandomGenerator<uint8_t> rand_gen(r_min, r_max);
diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h
index d78ad8938c4..93a84a7bc49 100644
--- a/cpp/tests/binaryop/util/operation.h
+++ b/cpp/tests/binaryop/util/operation.h
@@ -32,19 +32,19 @@ template <typename TypeOut, typename TypeLhs, typename TypeRhs>
 struct Add {
   // Allow sum between chronos only when both input and output types
   // are chronos. Unsupported combinations will fail to compile
-  template <typename OutT                        = TypeOut,
-            typename std::enable_if<cudf::is_chrono<OutT>() && cudf::is_chrono<TypeLhs>() &&
-                                      cudf::is_chrono<TypeRhs>(),
-                                    void>::type* = nullptr>
+  template <typename OutT           = TypeOut,
+            std::enable_if_t<cudf::is_chrono<OutT>() && cudf::is_chrono<TypeLhs>() &&
+                               cudf::is_chrono<TypeRhs>(),
+                             void>* = nullptr>
   OutT operator()(TypeLhs lhs, TypeRhs rhs) const
   {
     return lhs + rhs;
   }
 
-  template <typename OutT                        = TypeOut,
-            typename std::enable_if<!cudf::is_chrono<OutT>() || !cudf::is_chrono<TypeLhs>() ||
-                                      !cudf::is_chrono<TypeRhs>(),
-                                    void>::type* = nullptr>
+  template <typename OutT           = TypeOut,
+            std::enable_if_t<!cudf::is_chrono<OutT>() || !cudf::is_chrono<TypeLhs>() ||
+                               !cudf::is_chrono<TypeRhs>(),
+                             void>* = nullptr>
   OutT operator()(TypeLhs lhs, TypeRhs rhs) const
   {
     using TypeCommon = typename std::common_type<OutT, TypeLhs, TypeRhs>::type;
@@ -56,19 +56,19 @@ template <typename TypeOut, typename TypeLhs, typename TypeRhs>
 struct Sub {
   // Allow difference between chronos only when both input and output types
   // are chronos. Unsupported combinations will fail to compile
-  template <typename OutT                        = TypeOut,
-            typename std::enable_if<cudf::is_chrono<OutT>() && cudf::is_chrono<TypeLhs>() &&
-                                      cudf::is_chrono<TypeRhs>(),
-                                    void>::type* = nullptr>
+  template <typename OutT           = TypeOut,
+            std::enable_if_t<cudf::is_chrono<OutT>() && cudf::is_chrono<TypeLhs>() &&
+                               cudf::is_chrono<TypeRhs>(),
+                             void>* = nullptr>
   OutT operator()(TypeLhs lhs, TypeRhs rhs) const
   {
     return lhs - rhs;
   }
 
-  template <typename OutT                        = TypeOut,
-            typename std::enable_if<!cudf::is_chrono<OutT>() || !cudf::is_chrono<TypeLhs>() ||
-                                      !cudf::is_chrono<TypeRhs>(),
-                                    void>::type* = nullptr>
+  template <typename OutT           = TypeOut,
+            std::enable_if_t<!cudf::is_chrono<OutT>() || !cudf::is_chrono<TypeLhs>() ||
+                               !cudf::is_chrono<TypeRhs>(),
+                             void>* = nullptr>
   OutT operator()(TypeLhs lhs, TypeRhs rhs) const
   {
     using TypeCommon = typename std::common_type<OutT, TypeLhs, TypeRhs>::type;
@@ -78,28 +78,27 @@ struct Sub {
 
 template <typename TypeOut, typename TypeLhs, typename TypeRhs>
 struct Mul {
-  template <typename OutT                                                           = TypeOut,
-            typename std::enable_if<!cudf::is_duration_t<OutT>::value, void>::type* = nullptr>
+  template <typename OutT                                              = TypeOut,
+            std::enable_if_t<!cudf::is_duration_t<OutT>::value, void>* = nullptr>
   TypeOut operator()(TypeLhs lhs, TypeRhs rhs) const
   {
     using TypeCommon = typename std::common_type<TypeOut, TypeLhs, TypeRhs>::type;
     return static_cast<TypeOut>(static_cast<TypeCommon>(lhs) * static_cast<TypeCommon>(rhs));
   }
 
-  template <typename OutT                                                          = TypeOut,
-            typename std::enable_if<cudf::is_duration_t<OutT>::value, void>::type* = nullptr>
+  template <typename OutT                                             = TypeOut,
+            std::enable_if_t<cudf::is_duration_t<OutT>::value, void>* = nullptr>
   TypeOut operator()(TypeLhs x, TypeRhs y) const
   {
     return DurationProduct<TypeOut>(x, y);
   }
 
-  template <
-    typename OutT,
-    typename LhsT,
-    typename RhsT,
-    typename std::enable_if<(cudf::is_duration_t<LhsT>::value && std::is_integral_v<RhsT>) ||
-                              (cudf::is_duration_t<RhsT>::value && std::is_integral_v<LhsT>),
-                            void>::type* = nullptr>
+  template <typename OutT,
+            typename LhsT,
+            typename RhsT,
+            std::enable_if_t<(cudf::is_duration_t<LhsT>::value && std::is_integral_v<RhsT>) ||
+                               (cudf::is_duration_t<RhsT>::value && std::is_integral_v<LhsT>),
+                             void>* = nullptr>
   OutT DurationProduct(LhsT x, RhsT y) const
   {
     return x * y;
@@ -108,26 +107,26 @@ struct Mul {
 
 template <typename TypeOut, typename TypeLhs, typename TypeRhs>
 struct Div {
-  template <typename LhsT                                                           = TypeLhs,
-            typename std::enable_if<!cudf::is_duration_t<LhsT>::value, void>::type* = nullptr>
+  template <typename LhsT                                              = TypeLhs,
+            std::enable_if_t<!cudf::is_duration_t<LhsT>::value, void>* = nullptr>
   TypeOut operator()(TypeLhs lhs, TypeRhs rhs)
   {
     using TypeCommon = typename std::common_type<TypeOut, TypeLhs, TypeRhs>::type;
     return static_cast<TypeOut>(static_cast<TypeCommon>(lhs) / static_cast<TypeCommon>(rhs));
   }
 
-  template <typename LhsT                                                          = TypeLhs,
-            typename std::enable_if<cudf::is_duration_t<LhsT>::value, void>::type* = nullptr>
+  template <typename LhsT                                             = TypeLhs,
+            std::enable_if_t<cudf::is_duration_t<LhsT>::value, void>* = nullptr>
   TypeOut operator()(TypeLhs x, TypeRhs y) const
   {
     return DurationDivide<TypeOut>(x, y);
   }
 
-  template <typename OutT,
-            typename LhsT,
-            typename RhsT,
-            typename std::enable_if<(std::is_integral_v<RhsT> || cudf::is_duration<RhsT>()),
-                                    void>::type* = nullptr>
+  template <
+    typename OutT,
+    typename LhsT,
+    typename RhsT,
+    std::enable_if_t<(std::is_integral_v<RhsT> || cudf::is_duration<RhsT>()), void>* = nullptr>
   OutT DurationDivide(LhsT x, RhsT y) const
   {
     return x / y;
@@ -185,10 +184,10 @@ struct Mod {
   }
 
   // Mod with duration types - duration % (integral or a duration) = duration
-  template <typename LhsT                                                = TypeLhs,
-            typename OutT                                                = TypeOut,
-            typename std::enable_if_t<cudf::is_duration_t<LhsT>::value &&
-                                      cudf::is_duration_t<OutT>::value>* = nullptr>
+  template <typename LhsT                                       = TypeLhs,
+            typename OutT                                       = TypeOut,
+            std::enable_if_t<cudf::is_duration_t<LhsT>::value &&
+                             cudf::is_duration_t<OutT>::value>* = nullptr>
   TypeOut operator()(TypeLhs lhs, TypeRhs rhs)
   {
     return lhs % rhs;
diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu
index a306736d131..ec7fae58f98 100644
--- a/cpp/tests/copying/concatenate_tests.cu
+++ b/cpp/tests/copying/concatenate_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -340,6 +340,22 @@ struct OverflowTest : public cudf::test::BaseFixture {
 TEST_F(OverflowTest, OverflowTest)
 {
   using namespace cudf;
+  // should concatenate up to size_type::max rows.
+  {
+    // 5 x size + size_last adds to size_type::max
+    constexpr auto size      = static_cast<size_type>(static_cast<uint32_t>(250) * 1024 * 1024);
+    constexpr auto size_last = static_cast<size_type>(836763647);
+
+    auto many_chars      = cudf::make_fixed_width_column(data_type{type_id::INT8}, size);
+    auto many_chars_last = cudf::make_fixed_width_column(data_type{type_id::INT8}, size_last);
+
+    table_view tbl({*many_chars});
+    table_view tbl_last({*many_chars_last});
+    std::vector<cudf::table_view> table_views_to_concat({tbl, tbl, tbl, tbl, tbl, tbl_last});
+    std::unique_ptr<cudf::table> concatenated_tables = cudf::concatenate(table_views_to_concat);
+    EXPECT_NO_THROW(rmm::cuda_stream_default.synchronize());
+    ASSERT_EQ(concatenated_tables->num_rows(), std::numeric_limits<size_type>::max());
+  }
 
   // primitive column
   {
diff --git a/cpp/tests/copying/copy_tests.cpp b/cpp/tests/copying/copy_tests.cpp
index 4254794bf19..62f1300c284 100644
--- a/cpp/tests/copying/copy_tests.cpp
+++ b/cpp/tests/copying/copy_tests.cpp
@@ -378,18 +378,16 @@ TYPED_TEST(CopyTestNumeric, CopyIfElseTestScalarScalar)
 template <typename T>
 struct create_chrono_scalar {
   template <typename ChronoT = T, typename... Args>
-  typename std::enable_if_t<
-    std::is_same_v<typename cudf::is_timestamp_t<ChronoT>::type, std::true_type>,
-    cudf::timestamp_scalar<ChronoT>>
+  std::enable_if_t<std::is_same_v<typename cudf::is_timestamp_t<ChronoT>::type, std::true_type>,
+                   cudf::timestamp_scalar<ChronoT>>
   operator()(Args&&... args) const
   {
     return cudf::timestamp_scalar<T>(std::forward<Args>(args)...);
   }
 
   template <typename ChronoT = T, typename... Args>
-  typename std::enable_if_t<
-    std::is_same_v<typename cudf::is_duration_t<ChronoT>::type, std::true_type>,
-    cudf::duration_scalar<ChronoT>>
+  std::enable_if_t<std::is_same_v<typename cudf::is_duration_t<ChronoT>::type, std::true_type>,
+                   cudf::duration_scalar<ChronoT>>
   operator()(Args&&... args) const
   {
     return cudf::duration_scalar<T>(std::forward<Args>(args)...);
diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu
index fd065249c4e..31174d3fd72 100644
--- a/cpp/tests/device_atomics/device_atomics_test.cu
+++ b/cpp/tests/device_atomics/device_atomics_test.cu
@@ -51,7 +51,7 @@ constexpr inline bool is_timestamp_sum()
 // Disable SUM of TIMESTAMP types
 template <typename T,
           typename BinaryOp,
-          typename std::enable_if_t<is_timestamp_sum<T, BinaryOp>()>* = nullptr>
+          std::enable_if_t<is_timestamp_sum<T, BinaryOp>()>* = nullptr>
 __device__ T atomic_op(T* addr, T const& value, BinaryOp op)
 {
   return {};
@@ -59,7 +59,7 @@ __device__ T atomic_op(T* addr, T const& value, BinaryOp op)
 
 template <typename T,
           typename BinaryOp,
-          typename std::enable_if_t<!is_timestamp_sum<T, BinaryOp>()>* = nullptr>
+          std::enable_if_t<!is_timestamp_sum<T, BinaryOp>()>* = nullptr>
 __device__ T atomic_op(T* addr, T const& value, BinaryOp op)
 {
   T old_value = *addr;
@@ -92,13 +92,13 @@ __global__ void gpu_atomicCAS_test(T* result, T* data, size_t size)
 }
 
 template <typename T>
-typename std::enable_if_t<!cudf::is_timestamp<T>(), T> accumulate(cudf::host_span<T const> xs)
+std::enable_if_t<!cudf::is_timestamp<T>(), T> accumulate(cudf::host_span<T const> xs)
 {
   return std::accumulate(xs.begin(), xs.end(), T{0});
 }
 
 template <typename T>
-typename std::enable_if_t<cudf::is_timestamp<T>(), T> accumulate(cudf::host_span<T const> xs)
+std::enable_if_t<cudf::is_timestamp<T>(), T> accumulate(cudf::host_span<T const> xs)
 {
   auto ys = std::vector<typename T::rep>(xs.size());
   std::transform(
diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu
index 2591f395914..b0ce22bae7c 100644
--- a/cpp/tests/groupby/tdigest_tests.cu
+++ b/cpp/tests/groupby/tdigest_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,9 +60,8 @@ struct column_max {
 };
 
 struct tdigest_gen {
-  template <
-    typename T,
-    typename std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& keys, column_view const& values, int delta)
   {
     cudf::table_view t({keys});
@@ -75,9 +74,8 @@ struct tdigest_gen {
     return std::move(result.second[0].results[0]);
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& keys, column_view const& values, int delta)
   {
     CUDF_FAIL("Invalid tdigest test type");
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 08cdbb10359..e5e44b1aa6e 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -262,7 +262,7 @@ void check_string_column(cudf::column_view const& col_lhs,
 }
 
 // Helper function to compare two floating-point column contents
-template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
 void expect_column_data_equal(std::vector<T> const& lhs, cudf::column_view const& rhs)
 {
   EXPECT_THAT(cudf::test::to_host<T>(rhs).first,
@@ -270,7 +270,7 @@ void expect_column_data_equal(std::vector<T> const& lhs, cudf::column_view const
 }
 
 // Helper function to compare two column contents
-template <typename T, typename std::enable_if_t<!std::is_floating_point_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<!std::is_floating_point_v<T>>* = nullptr>
 void expect_column_data_equal(std::vector<T> const& lhs, cudf::column_view const& rhs)
 {
   EXPECT_THAT(cudf::test::to_host<T>(rhs).first, ::testing::ElementsAreArray(lhs));
diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp
index 27a8be95e9b..cfd1a16f19a 100644
--- a/cpp/tests/io/text/multibyte_split_test.cpp
+++ b/cpp/tests/io/text/multibyte_split_test.cpp
@@ -21,6 +21,8 @@
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <cudf/concatenate.hpp>
+#include <cudf/io/text/byte_range_info.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/multibyte_split.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -142,4 +144,29 @@ TEST_F(MultibyteSplitTest, HandpickedInput)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
 }
 
+TEST_F(MultibyteSplitTest, LargeInputMultipleRange)
+{
+  auto host_input    = std::string();
+  auto host_expected = std::vector<std::string>();
+
+  for (auto i = 0; i < 1000; i++) {
+    host_input += "...:|";
+  }
+
+  auto delimiter = std::string("...:|");
+  auto source    = cudf::io::text::make_source(host_input);
+
+  auto byte_ranges = cudf::io::text::create_byte_range_infos_consecutive(host_input.size(), 3);
+  auto out0        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[0]);
+  auto out1        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[1]);
+  auto out2        = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[2]);
+
+  auto out_views = std::vector<cudf::column_view>({out0->view(), out1->view(), out2->view()});
+  auto out       = cudf::concatenate(out_views);
+
+  auto expected = cudf::io::text::multibyte_split(*source, delimiter);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/quantiles/percentile_approx_test.cu b/cpp/tests/quantiles/percentile_approx_test.cu
index 2f4d5a7a604..035cd664aa2 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cu
+++ b/cpp/tests/quantiles/percentile_approx_test.cu
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #include <arrow/util/tdigest.h>
 
 #include <cudf/detail/tdigest/tdigest.hpp>
@@ -21,9 +36,8 @@ using namespace cudf;
 using namespace cudf::tdigest;
 
 struct tdigest_gen {
-  template <
-    typename T,
-    typename std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& keys, column_view const& values, int delta)
   {
     cudf::table_view t({keys});
@@ -36,9 +50,8 @@ struct tdigest_gen {
     return std::move(result.second[0].results[0]);
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& keys, column_view const& values, int delta)
   {
     CUDF_FAIL("Invalid tdigest test type");
@@ -89,9 +102,8 @@ std::unique_ptr<column> arrow_percentile_approx(column_view const& _values,
 }
 
 struct percentile_approx_dispatch {
-  template <
-    typename T,
-    typename std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& keys,
                                      column_view const& values,
                                      int delta,
@@ -127,9 +139,8 @@ struct percentile_approx_dispatch {
     return result;
   }
 
-  template <
-    typename T,
-    typename std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
+  template <typename T,
+            std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& keys,
                                      column_view const& values,
                                      int delta,
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index fcecc124978..276b244dac6 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -38,7 +38,7 @@
 using aggregation = cudf::aggregation;
 
 template <typename T>
-typename std::enable_if<!cudf::is_timestamp_t<T>::value, std::vector<T>>::type convert_values(
+std::enable_if_t<!cudf::is_timestamp_t<T>::value, std::vector<T>> convert_values(
   std::vector<int> const& int_values)
 {
   std::vector<T> v(int_values.size());
@@ -50,7 +50,7 @@ typename std::enable_if<!cudf::is_timestamp_t<T>::value, std::vector<T>>::type c
 }
 
 template <typename T>
-typename std::enable_if<cudf::is_timestamp_t<T>::value, std::vector<T>>::type convert_values(
+std::enable_if_t<cudf::is_timestamp_t<T>::value, std::vector<T>> convert_values(
   std::vector<int> const& int_values)
 {
   std::vector<T> v(int_values.size());
diff --git a/cpp/tests/reductions/scan_tests.hpp b/cpp/tests/reductions/scan_tests.hpp
index 346103de85b..858697d8ef5 100644
--- a/cpp/tests/reductions/scan_tests.hpp
+++ b/cpp/tests/reductions/scan_tests.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -58,25 +58,23 @@ struct TypeParam_to_host_type<numeric::decimal128> {
 };
 
 template <typename TypeParam, typename T>
-typename std::enable_if<std::is_same_v<TypeParam, cudf::string_view>,
-                        thrust::host_vector<std::string>>::type
+std::enable_if_t<std::is_same_v<TypeParam, cudf::string_view>, thrust::host_vector<std::string>>
 make_vector(std::initializer_list<T> const& init)
 {
   return cudf::test::make_type_param_vector<std::string, T>(init);
 }
 
 template <typename TypeParam, typename T>
-typename std::enable_if<cudf::is_fixed_point<TypeParam>(),
-                        thrust::host_vector<typename TypeParam::rep>>::type
+std::enable_if_t<cudf::is_fixed_point<TypeParam>(), thrust::host_vector<typename TypeParam::rep>>
 make_vector(std::initializer_list<T> const& init)
 {
   return cudf::test::make_type_param_vector<typename TypeParam::rep, T>(init);
 }
 
 template <typename TypeParam, typename T>
-typename std::enable_if<not(std::is_same_v<TypeParam, cudf::string_view> ||
-                            cudf::is_fixed_point<TypeParam>()),
-                        thrust::host_vector<TypeParam>>::type
+std::enable_if_t<not(std::is_same_v<TypeParam, cudf::string_view> ||
+                     cudf::is_fixed_point<TypeParam>()),
+                 thrust::host_vector<TypeParam>>
 make_vector(std::initializer_list<T> const& init)
 {
   return cudf::test::make_type_param_vector<TypeParam, T>(init);
diff --git a/cpp/tests/sort/is_sorted_tests.cpp b/cpp/tests/sort/is_sorted_tests.cpp
index 7d277059ef7..44fa83204ee 100644
--- a/cpp/tests/sort/is_sorted_tests.cpp
+++ b/cpp/tests/sort/is_sorted_tests.cpp
@@ -36,8 +36,7 @@ namespace testdata {
 // ----- most numerics
 
 template <typename T>
-typename std::enable_if<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
-                        fixed_width_column_wrapper<T>>::type
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>, fixed_width_column_wrapper<T>>
 ascending()
 {
   return std::is_signed_v<T> ? fixed_width_column_wrapper<T>({std::numeric_limits<T>::lowest(),
@@ -58,8 +57,7 @@ ascending()
 }
 
 template <typename T>
-typename std::enable_if<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
-                        fixed_width_column_wrapper<T>>::type
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>, fixed_width_column_wrapper<T>>
 descending()
 {
   return std::is_signed_v<T> ? fixed_width_column_wrapper<T>({std::numeric_limits<T>::max(),
@@ -100,14 +98,13 @@ auto nulls_before()
 // ----- bool
 
 template <typename T>
-typename std::enable_if<std::is_same_v<T, bool>, fixed_width_column_wrapper<bool>>::type ascending()
+std::enable_if_t<std::is_same_v<T, bool>, fixed_width_column_wrapper<bool>> ascending()
 {
   return fixed_width_column_wrapper<bool>({false, false, true, true});
 }
 
 template <typename T>
-typename std::enable_if<std::is_same_v<T, bool>, fixed_width_column_wrapper<bool>>::type
-descending()
+std::enable_if_t<std::is_same_v<T, bool>, fixed_width_column_wrapper<bool>> descending()
 {
   return fixed_width_column_wrapper<bool>({true, true, false, false});
 }
@@ -115,13 +112,13 @@ descending()
 // ----- chrono types
 
 template <typename T>
-typename std::enable_if<cudf::is_chrono<T>(), fixed_width_column_wrapper<T>>::type ascending()
+std::enable_if_t<cudf::is_chrono<T>(), fixed_width_column_wrapper<T>> ascending()
 {
   return fixed_width_column_wrapper<T>({T::min(), T::max()});
 }
 
 template <typename T>
-typename std::enable_if<cudf::is_chrono<T>(), fixed_width_column_wrapper<T>>::type descending()
+std::enable_if_t<cudf::is_chrono<T>(), fixed_width_column_wrapper<T>> descending()
 {
   return fixed_width_column_wrapper<T>({T::max(), T::min()});
 }
@@ -129,15 +126,13 @@ typename std::enable_if<cudf::is_chrono<T>(), fixed_width_column_wrapper<T>>::ty
 // ----- string_view
 
 template <typename T>
-typename std::enable_if<std::is_same_v<T, cudf::string_view>, strings_column_wrapper>::type
-ascending()
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, strings_column_wrapper> ascending()
 {
   return strings_column_wrapper({"A", "B"});
 }
 
 template <typename T>
-typename std::enable_if<std::is_same_v<T, cudf::string_view>, strings_column_wrapper>::type
-descending()
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, strings_column_wrapper> descending()
 {
   return strings_column_wrapper({"B", "A"});
 }
@@ -163,8 +158,7 @@ auto nulls_before<cudf::string_view>()
 // ----- struct_view {"nestedInt" : {"Int" : 0 }, "float" : 1}
 
 template <typename T>
-typename std::enable_if<std::is_same_v<T, cudf::struct_view>, structs_column_wrapper>::type
-ascending()
+std::enable_if_t<std::is_same_v<T, cudf::struct_view>, structs_column_wrapper> ascending()
 {
   using T1           = int32_t;
   auto int_col       = fixed_width_column_wrapper<int32_t>({std::numeric_limits<T1>::lowest(),
@@ -182,8 +176,7 @@ ascending()
 }
 
 template <typename T>
-typename std::enable_if<std::is_same_v<T, cudf::struct_view>, structs_column_wrapper>::type
-descending()
+std::enable_if_t<std::is_same_v<T, cudf::struct_view>, structs_column_wrapper> descending()
 {
   using T1           = int32_t;
   auto int_col       = fixed_width_column_wrapper<int32_t>({std::numeric_limits<T1>::max(),
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index 5403d56318e..9daf70227f8 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -836,13 +836,13 @@ std::vector<bitmask_type> bitmask_to_host(cudf::column_view const& c)
 
 namespace {
 
-template <typename T, typename std::enable_if_t<std::is_integral_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
 static auto numeric_to_string_precise(T value)
 {
   return std::to_string(value);
 }
 
-template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
+template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
 static auto numeric_to_string_precise(T value)
 {
   std::ostringstream o;
@@ -915,7 +915,7 @@ std::string nested_offsets_to_string(NestedColumnView const& c, std::string cons
 }
 
 struct column_view_printer {
-  template <typename Element, typename std::enable_if_t<is_numeric<Element>()>* = nullptr>
+  template <typename Element, std::enable_if_t<is_numeric<Element>()>* = nullptr>
   void operator()(cudf::column_view const& col, std::vector<std::string>& out, std::string const&)
   {
     auto h_data = cudf::test::to_host<Element>(col);
@@ -939,7 +939,7 @@ struct column_view_printer {
     }
   }
 
-  template <typename Element, typename std::enable_if_t<is_timestamp<Element>()>* = nullptr>
+  template <typename Element, std::enable_if_t<is_timestamp<Element>()>* = nullptr>
   void operator()(cudf::column_view const& col,
                   std::vector<std::string>& out,
                   std::string const& indent)
@@ -965,7 +965,7 @@ struct column_view_printer {
     this->template operator()<cudf::string_view>(*col_as_strings, out, indent);
   }
 
-  template <typename Element, typename std::enable_if_t<cudf::is_fixed_point<Element>()>* = nullptr>
+  template <typename Element, std::enable_if_t<cudf::is_fixed_point<Element>()>* = nullptr>
   void operator()(cudf::column_view const& col, std::vector<std::string>& out, std::string const&)
   {
     auto const h_data = cudf::test::to_host<Element>(col);
@@ -987,7 +987,7 @@ struct column_view_printer {
   }
 
   template <typename Element,
-            typename std::enable_if_t<std::is_same_v<Element, cudf::string_view>>* = nullptr>
+            std::enable_if_t<std::is_same_v<Element, cudf::string_view>>* = nullptr>
   void operator()(cudf::column_view const& col, std::vector<std::string>& out, std::string const&)
   {
     //
@@ -1008,7 +1008,7 @@ struct column_view_printer {
   }
 
   template <typename Element,
-            typename std::enable_if_t<std::is_same_v<Element, cudf::dictionary32>>* = nullptr>
+            std::enable_if_t<std::is_same_v<Element, cudf::dictionary32>>* = nullptr>
   void operator()(cudf::column_view const& col, std::vector<std::string>& out, std::string const&)
   {
     cudf::dictionary_column_view dictionary(col);
@@ -1029,7 +1029,7 @@ struct column_view_printer {
   }
 
   // Print the tick counts with the units
-  template <typename Element, typename std::enable_if_t<is_duration<Element>()>* = nullptr>
+  template <typename Element, std::enable_if_t<is_duration<Element>()>* = nullptr>
   void operator()(cudf::column_view const& col, std::vector<std::string>& out, std::string const&)
   {
     auto h_data = cudf::test::to_host<Element>(col);
@@ -1054,8 +1054,7 @@ struct column_view_printer {
     }
   }
 
-  template <typename Element,
-            typename std::enable_if_t<std::is_same_v<Element, cudf::list_view>>* = nullptr>
+  template <typename Element, std::enable_if_t<std::is_same_v<Element, cudf::list_view>>* = nullptr>
   void operator()(cudf::column_view const& col,
                   std::vector<std::string>& out,
                   std::string const& indent)
@@ -1084,7 +1083,7 @@ struct column_view_printer {
   }
 
   template <typename Element,
-            typename std::enable_if_t<std::is_same_v<Element, cudf::struct_view>>* = nullptr>
+            std::enable_if_t<std::is_same_v<Element, cudf::struct_view>>* = nullptr>
   void operator()(cudf::column_view const& col,
                   std::vector<std::string>& out,
                   std::string const& indent)
diff --git a/cpp/tests/wrappers/timestamps_test.cu b/cpp/tests/wrappers/timestamps_test.cu
index 097b786aefe..48500c84942 100644
--- a/cpp/tests/wrappers/timestamps_test.cu
+++ b/cpp/tests/wrappers/timestamps_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ struct compare_chrono_elements_to_primitive_representation {
   {
   }
 
-  template <typename T = ChronoT, typename std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
+  template <typename T = ChronoT, std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
   __host__ __device__ bool operator()(const int32_t element_index)
   {
     using Primitive = typename ChronoT::rep;
@@ -59,7 +59,7 @@ struct compare_chrono_elements_to_primitive_representation {
     return primitive == timestamp.time_since_epoch().count();
   }
 
-  template <typename T = ChronoT, typename std::enable_if_t<cudf::is_duration<T>()>* = nullptr>
+  template <typename T = ChronoT, std::enable_if_t<cudf::is_duration<T>()>* = nullptr>
   __host__ __device__ bool operator()(const int32_t element_index)
   {
     using Primitive = typename ChronoT::rep;
diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index 2de55553c3f..7a7c9c195b2 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -209,8 +209,8 @@ Reshaping, sorting, transposing
    DataFrame.T
    DataFrame.transpose
 
-Combining / comparing / joining / merging / encoding
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Combining / comparing / joining / merging
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
    :toctree: api/
 
@@ -219,8 +219,6 @@ Combining / comparing / joining / merging / encoding
    DataFrame.join
    DataFrame.merge
    DataFrame.update
-   DataFrame.label_encoding
-   DataFrame.one_hot_encoding
 
 Numerical operations
 ~~~~~~~~~~~~~~~~~~~~
@@ -249,8 +247,6 @@ Serialization / IO / conversion
 .. autosummary::
    :toctree: api/
 
-   DataFrame.as_gpu_matrix
-   DataFrame.as_matrix
    DataFrame.from_arrow
    DataFrame.from_pandas
    DataFrame.from_records
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
index d705504cc0c..b7b358e38be 100644
--- a/docs/cudf/source/api_docs/index_objects.rst
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -34,7 +34,7 @@ Properties
    Index.shape
    Index.size
    Index.values
-   
+
 
 Modifying and computations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -94,7 +94,6 @@ Conversion
    Index.astype
    Index.to_arrow
    Index.to_list
-   Index.to_numpy
    Index.to_series
    Index.to_frame
    Index.to_pandas
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index cf5dd4a2a1d..376acf1694b 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -44,7 +44,6 @@ Conversion
    Series.copy
    Series.to_list
    Series.__array__
-   Series.as_mask
    Series.scale
 
 
@@ -172,9 +171,7 @@ Reindexing / selection / label manipulation
    Series.reindex
    Series.rename
    Series.reset_index
-   Series.reverse
    Series.sample
-   Series.set_mask
    Series.take
    Series.tail
    Series.tile
@@ -210,15 +207,13 @@ Reshaping, sorting
    Series.repeat
    Series.transpose
 
-Combining / comparing / joining / merging / encoding
-----------------------------------------------------
+Combining / comparing / joining / merging
+-----------------------------------------
 .. autosummary::
    :toctree: api/
 
    Series.append
    Series.update
-   Series.label_encoding
-   Series.one_hot_encoding
 
 Numerical operations
 ~~~~~~~~~~~~~~~~~~~~
@@ -409,12 +404,10 @@ Serialization / IO / conversion
    :toctree: api/
 
    Series.to_arrow
-   Series.to_cupy
    Series.to_dlpack
    Series.to_frame
    Series.to_hdf
    Series.to_json
-   Series.to_numpy
    Series.to_pandas
    Series.to_string
    Series.from_arrow
diff --git a/python/cudf/cudf/_lib/cpp/io/text.pxd b/python/cudf/cudf/_lib/cpp/io/text.pxd
index 9ce0c68cb08..5b110d6234c 100644
--- a/python/cudf/cudf/_lib/cpp/io/text.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/text.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -6,6 +6,13 @@ from libcpp.string cimport string
 from cudf._lib.cpp.column.column cimport column
 
 
+cdef extern from "cudf/io/text/byte_range_info.hpp" \
+        namespace "cudf::io::text" nogil:
+
+    cdef cppclass byte_range_info:
+        byte_range_info() except +
+        byte_range_info(size_t offset, size_t size) except +
+
 cdef extern from "cudf/io/text/data_chunk_source.hpp" \
         namespace "cudf::io::text" nogil:
 
@@ -25,3 +32,7 @@ cdef extern from "cudf/io/text/multibyte_split.hpp" \
 
     unique_ptr[column] multibyte_split(data_chunk_source source,
                                        string delimiter) except +
+
+    unique_ptr[column] multibyte_split(data_chunk_source source,
+                                       string delimiter,
+                                       byte_range_info byte_range) except +
diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx
index 9f33f32bdaf..daea227cc39 100644
--- a/python/cudf/cudf/_lib/text.pyx
+++ b/python/cudf/cudf/_lib/text.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import cudf
 
@@ -10,6 +10,7 @@ from libcpp.utility cimport move
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.io.text cimport (
+    byte_range_info,
     data_chunk_source,
     make_source,
     make_source_from_file,
@@ -18,7 +19,8 @@ from cudf._lib.cpp.io.text cimport (
 
 
 def read_text(object filepaths_or_buffers,
-              object delimiter=None):
+              object delimiter=None,
+              object byte_range=None):
     """
     Cython function to call into libcudf API, see `multibyte_split`.
 
@@ -31,9 +33,25 @@ def read_text(object filepaths_or_buffers,
 
     cdef unique_ptr[data_chunk_source] datasource
     cdef unique_ptr[column] c_col
-
-    with nogil:
-        datasource = move(make_source_from_file(filename))
-        c_col = move(multibyte_split(dereference(datasource), delim))
+    cdef size_t c_byte_range_offset
+    cdef size_t c_byte_range_size
+    cdef byte_range_info c_byte_range
+
+    if (byte_range is not None):
+        c_byte_range_offset = byte_range[0]
+        c_byte_range_size = byte_range[1]
+        with nogil:
+            datasource = move(make_source_from_file(filename))
+            c_byte_range = byte_range_info(
+                c_byte_range_offset,
+                c_byte_range_size)
+            c_col = move(multibyte_split(
+                dereference(datasource),
+                delim,
+                c_byte_range))
+    else:
+        with nogil:
+            datasource = move(make_source_from_file(filename))
+            c_col = move(multibyte_split(dereference(datasource), delim))
 
     return {None: Column.from_unique_ptr(move(c_col))}
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index d5edbd7284e..510bf0208b7 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -46,7 +46,7 @@
 )
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.join import Merge, MergeSemi
-from cudf.core.mixins import Scannable
+from cudf.core.mixins import BinaryOperand, Scannable
 from cudf.core.window import Rolling
 from cudf.utils import ioutils
 from cudf.utils.docutils import copy_docstring
@@ -98,7 +98,7 @@
 }
 
 
-class Frame(Scannable):
+class Frame(BinaryOperand, Scannable):
     """A collection of Column objects with an optional index.
 
     Parameters
@@ -115,6 +115,8 @@ class Frame(Scannable):
     _index: Optional[cudf.core.index.BaseIndex]
     _names: Optional[List]
 
+    _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS
+
     _VALID_SCANS = {
         "cumsum",
         "cumprod",
@@ -3571,13 +3573,7 @@ def _unaryop(self, op):
         )
 
     def _binaryop(
-        self,
-        other: T,
-        fn: str,
-        fill_value: Any = None,
-        reflect: bool = False,
-        *args,
-        **kwargs,
+        self, other: T, op: str, fill_value: Any = None, *args, **kwargs,
     ) -> Frame:
         """Perform a binary operation between two frames.
 
@@ -3585,15 +3581,11 @@ def _binaryop(
         ----------
         other : Frame
             The second operand.
-        fn : str
+        op : str
             The operation to perform.
         fill_value : Any, default None
             The value to replace null values with. If ``None``, nulls are not
             filled before the operation.
-        reflect : bool, default False
-            If ``True``, swap the order of the operands. See
-            https://docs.python.org/3/reference/datamodel.html#object.__ror__
-            for more information on when this is necessary.
 
         Returns
         -------
@@ -3633,6 +3625,7 @@ def _colwise_binop(
             A dict of columns constructed from the result of performing the
             requested operation on the operands.
         """
+        fn = fn[2:-2]
 
         # Now actually perform the binop on the columns in left and right.
         output = {}
@@ -3915,83 +3908,12 @@ def dot(self, other, reflect=False):
             return cudf.DataFrame(result)
         return result.item()
 
-    # Binary arithmetic operations.
-    def __add__(self, other):
-        return self._binaryop(other, "add")
-
-    def __radd__(self, other):
-        return self._binaryop(other, "add", reflect=True)
-
-    def __sub__(self, other):
-        return self._binaryop(other, "sub")
-
-    def __rsub__(self, other):
-        return self._binaryop(other, "sub", reflect=True)
-
     def __matmul__(self, other):
         return self.dot(other)
 
     def __rmatmul__(self, other):
         return self.dot(other, reflect=True)
 
-    def __mul__(self, other):
-        return self._binaryop(other, "mul")
-
-    def __rmul__(self, other):
-        return self._binaryop(other, "mul", reflect=True)
-
-    def __mod__(self, other):
-        return self._binaryop(other, "mod")
-
-    def __rmod__(self, other):
-        return self._binaryop(other, "mod", reflect=True)
-
-    def __pow__(self, other):
-        return self._binaryop(other, "pow")
-
-    def __rpow__(self, other):
-        return self._binaryop(other, "pow", reflect=True)
-
-    def __floordiv__(self, other):
-        return self._binaryop(other, "floordiv")
-
-    def __rfloordiv__(self, other):
-        return self._binaryop(other, "floordiv", reflect=True)
-
-    def __truediv__(self, other):
-        return self._binaryop(other, "truediv")
-
-    def __rtruediv__(self, other):
-        return self._binaryop(other, "truediv", reflect=True)
-
-    def __and__(self, other):
-        return self._binaryop(other, "and")
-
-    def __or__(self, other):
-        return self._binaryop(other, "or")
-
-    def __xor__(self, other):
-        return self._binaryop(other, "xor")
-
-    # Binary rich comparison operations.
-    def __eq__(self, other):
-        return self._binaryop(other, "eq")
-
-    def __ne__(self, other):
-        return self._binaryop(other, "ne")
-
-    def __lt__(self, other):
-        return self._binaryop(other, "lt")
-
-    def __le__(self, other):
-        return self._binaryop(other, "le")
-
-    def __gt__(self, other):
-        return self._binaryop(other, "gt")
-
-    def __ge__(self, other):
-        return self._binaryop(other, "ge")
-
     # Unary logical operators
     def __neg__(self):
         return -1 * self
@@ -5062,7 +4984,7 @@ def add(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "add", fill_value)
+        return self._binaryop(other, "__add__", fill_value)
 
     @annotate("FRAME_RADD", color="green", domain="cudf_python")
     def radd(self, other, axis, level=None, fill_value=None):
@@ -5142,7 +5064,7 @@ def radd(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "add", fill_value, reflect=True)
+        return self._binaryop(other, "__radd__", fill_value)
 
     @annotate("FRAME_SUBTRACT", color="green", domain="cudf_python")
     def subtract(self, other, axis, level=None, fill_value=None):
@@ -5223,7 +5145,7 @@ def subtract(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "sub", fill_value)
+        return self._binaryop(other, "__sub__", fill_value)
 
     sub = subtract
 
@@ -5309,7 +5231,7 @@ def rsub(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "sub", fill_value, reflect=True)
+        return self._binaryop(other, "__rsub__", fill_value)
 
     @annotate("FRAME_MULTIPLY", color="green", domain="cudf_python")
     def multiply(self, other, axis, level=None, fill_value=None):
@@ -5392,7 +5314,7 @@ def multiply(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "mul", fill_value)
+        return self._binaryop(other, "__mul__", fill_value)
 
     mul = multiply
 
@@ -5479,7 +5401,7 @@ def rmul(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "mul", fill_value, reflect=True)
+        return self._binaryop(other, "__rmul__", fill_value)
 
     @annotate("FRAME_MOD", color="green", domain="cudf_python")
     def mod(self, other, axis, level=None, fill_value=None):
@@ -5550,7 +5472,7 @@ def mod(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "mod", fill_value)
+        return self._binaryop(other, "__mod__", fill_value)
 
     @annotate("FRAME_RMOD", color="green", domain="cudf_python")
     def rmod(self, other, axis, level=None, fill_value=None):
@@ -5633,7 +5555,7 @@ def rmod(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "mod", fill_value, reflect=True)
+        return self._binaryop(other, "__rmod__", fill_value)
 
     @annotate("FRAME_POW", color="green", domain="cudf_python")
     def pow(self, other, axis, level=None, fill_value=None):
@@ -5713,7 +5635,7 @@ def pow(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "pow", fill_value)
+        return self._binaryop(other, "__pow__", fill_value)
 
     @annotate("FRAME_RPOW", color="green", domain="cudf_python")
     def rpow(self, other, axis, level=None, fill_value=None):
@@ -5793,7 +5715,7 @@ def rpow(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "pow", fill_value, reflect=True)
+        return self._binaryop(other, "__rpow__", fill_value)
 
     @annotate("FRAME_FLOORDIV", color="green", domain="cudf_python")
     def floordiv(self, other, axis, level=None, fill_value=None):
@@ -5873,7 +5795,7 @@ def floordiv(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "floordiv", fill_value)
+        return self._binaryop(other, "__floordiv__", fill_value)
 
     @annotate("FRAME_RFLOORDIV", color="green", domain="cudf_python")
     def rfloordiv(self, other, axis, level=None, fill_value=None):
@@ -5970,7 +5892,7 @@ def rfloordiv(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "floordiv", fill_value, reflect=True)
+        return self._binaryop(other, "__rfloordiv__", fill_value)
 
     @annotate("FRAME_TRUEDIV", color="green", domain="cudf_python")
     def truediv(self, other, axis, level=None, fill_value=None):
@@ -6055,7 +5977,7 @@ def truediv(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "truediv", fill_value)
+        return self._binaryop(other, "__truediv__", fill_value)
 
     # Alias for truediv
     div = truediv
@@ -6149,7 +6071,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None):
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
-        return self._binaryop(other, "truediv", fill_value, reflect=True)
+        return self._binaryop(other, "__rtruediv__", fill_value)
 
     # Alias for rtruediv
     rdiv = rtruediv
@@ -6227,7 +6149,7 @@ def eq(self, other, axis="columns", level=None, fill_value=None):
         dtype: bool
         """
         return self._binaryop(
-            other=other, fn="eq", fill_value=fill_value, can_reindex=True
+            other=other, op="__eq__", fill_value=fill_value, can_reindex=True
         )
 
     @annotate("FRAME_NE", color="green", domain="cudf_python")
@@ -6303,7 +6225,7 @@ def ne(self, other, axis="columns", level=None, fill_value=None):
         dtype: bool
         """  # noqa: E501
         return self._binaryop(
-            other=other, fn="ne", fill_value=fill_value, can_reindex=True
+            other=other, op="__ne__", fill_value=fill_value, can_reindex=True
         )
 
     @annotate("FRAME_LT", color="green", domain="cudf_python")
@@ -6379,7 +6301,7 @@ def lt(self, other, axis="columns", level=None, fill_value=None):
         dtype: bool
         """  # noqa: E501
         return self._binaryop(
-            other=other, fn="lt", fill_value=fill_value, can_reindex=True
+            other=other, op="__lt__", fill_value=fill_value, can_reindex=True
         )
 
     @annotate("FRAME_LE", color="green", domain="cudf_python")
@@ -6455,7 +6377,7 @@ def le(self, other, axis="columns", level=None, fill_value=None):
         dtype: bool
         """  # noqa: E501
         return self._binaryop(
-            other=other, fn="le", fill_value=fill_value, can_reindex=True
+            other=other, op="__le__", fill_value=fill_value, can_reindex=True
         )
 
     @annotate("FRAME_GT", color="green", domain="cudf_python")
@@ -6531,7 +6453,7 @@ def gt(self, other, axis="columns", level=None, fill_value=None):
         dtype: bool
         """  # noqa: E501
         return self._binaryop(
-            other=other, fn="gt", fill_value=fill_value, can_reindex=True
+            other=other, op="__gt__", fill_value=fill_value, can_reindex=True
         )
 
     @annotate("FRAME_GE", color="green", domain="cudf_python")
@@ -6607,7 +6529,7 @@ def ge(self, other, axis="columns", level=None, fill_value=None):
         dtype: bool
         """  # noqa: E501
         return self._binaryop(
-            other=other, fn="ge", fill_value=fill_value, can_reindex=True
+            other=other, op="__ge__", fill_value=fill_value, can_reindex=True
         )
 
     def nunique(self, method: builtins.str = "sort", dropna: bool = True):
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 5aab834d452..343ba33ece1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -52,6 +52,7 @@
 from cudf.core.column.string import StringMethods as StringMethods
 from cudf.core.dtypes import IntervalDtype
 from cudf.core.frame import Frame
+from cudf.core.mixins import BinaryOperand
 from cudf.core.single_column_frame import SingleColumnFrame
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import find_common_type
@@ -122,7 +123,7 @@ def _index_from_columns(
     return _index_from_data(dict(zip(range(len(columns)), columns)), name=name)
 
 
-class RangeIndex(BaseIndex):
+class RangeIndex(BaseIndex, BinaryOperand):
     """
     Immutable Index implementing a monotonic integer range.
 
@@ -155,6 +156,8 @@ class RangeIndex(BaseIndex):
     RangeIndex(start=1, stop=10, step=1, name='a')
     """
 
+    _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS
+
     _range: range
 
     def __init__(
@@ -698,43 +701,16 @@ def _apply_boolean_mask(self, boolean_mask):
             [self._values.apply_boolean_mask(boolean_mask)], [self.name]
         )
 
+    def _binaryop(self, other, op: str):
+        return self._as_int64()._binaryop(other, op=op)
+
 
 # Patch in all binops and unary ops, which bypass __getattr__ on the instance
 # and prevent the above overload from working.
-for binop in (
-    "__add__",
-    "__radd__",
-    "__sub__",
-    "__rsub__",
-    "__mod__",
-    "__rmod__",
-    "__pow__",
-    "__rpow__",
-    "__floordiv__",
-    "__rfloordiv__",
-    "__truediv__",
-    "__rtruediv__",
-    "__and__",
-    "__or__",
-    "__xor__",
-    "__eq__",
-    "__ne__",
-    "__lt__",
-    "__le__",
-    "__gt__",
-    "__ge__",
-):
-    setattr(
-        RangeIndex,
-        binop,
-        lambda self, other, op=binop: getattr(self._as_int64(), op)(other),
-    )
-
-
 for unaop in ("__neg__", "__pos__", "__abs__"):
     setattr(
         RangeIndex,
-        binop,
+        unaop,
         lambda self, op=unaop: getattr(self._as_int64(), op)(),
     )
 
@@ -814,19 +790,15 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         return NotImplemented
 
     def _binaryop(
-        self,
-        other: T,
-        fn: str,
-        fill_value: Any = None,
-        reflect: bool = False,
-        *args,
-        **kwargs,
+        self, other: T, op: str, fill_value: Any = None, *args, **kwargs,
     ) -> SingleColumnFrame:
-        # Specialize binops to generate the appropriate output index type.
+        reflect = self._is_reflected_op(op)
+        if reflect:
+            op = op[:2] + op[3:]
         operands = self._make_operands_for_binop(other, fill_value, reflect)
         if operands is NotImplemented:
             return NotImplemented
-        ret = _index_from_data(self._colwise_binop(operands, fn))
+        ret = _index_from_data(self._colwise_binop(operands, op))
 
         # pandas returns numpy arrays when the outputs are boolean. We
         # explicitly _do not_ use isinstance here: we want only boolean
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 3ae0a838873..331457d17ae 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -1697,21 +1697,23 @@ def last(self, offset):
     def _binaryop(
         self,
         other: Any,
-        fn: str,
+        op: str,
         fill_value: Any = None,
-        reflect: bool = False,
         can_reindex: bool = False,
         *args,
         **kwargs,
     ):
+        reflect = self._is_reflected_op(op)
+        if reflect:
+            op = op[:2] + op[3:]
         operands, out_index = self._make_operands_and_index_for_binop(
-            other, fn, fill_value, reflect, can_reindex
+            other, op, fill_value, reflect, can_reindex
         )
         if operands is NotImplemented:
             return NotImplemented
 
         return self._from_data(
-            ColumnAccessor(type(self)._colwise_binop(operands, fn)),
+            ColumnAccessor(type(self)._colwise_binop(operands, op)),
             index=out_index,
         )
 
diff --git a/python/cudf/cudf/core/mixins/__init__.py b/python/cudf/cudf/core/mixins/__init__.py
index 507b3b18ac2..8306f3f11b3 100644
--- a/python/cudf/cudf/core/mixins/__init__.py
+++ b/python/cudf/cudf/core/mixins/__init__.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
+from .binops import BinaryOperand
 from .reductions import Reducible
 from .scans import Scannable
 
-__all__ = ["Reducible", "Scannable"]
+__all__ = ["BinaryOperand", "Reducible", "Scannable"]
diff --git a/python/cudf/cudf/core/mixins/binops.py b/python/cudf/cudf/core/mixins/binops.py
new file mode 100644
index 00000000000..773b47b62b2
--- /dev/null
+++ b/python/cudf/cudf/core/mixins/binops.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from .mixin_factory import _create_delegating_mixin
+
+BinaryOperand = _create_delegating_mixin(
+    "BinaryOperand",
+    "Mixin encapsulating binary operations.",
+    "BINARY_OPERATION",
+    "_binaryop",
+    {
+        # Numeric operations.
+        "__add__",
+        "__sub__",
+        "__mul__",
+        "__matmul__",
+        "__truediv__",
+        "__floordiv__",
+        "__mod__",
+        # "__divmod__", # Not yet implemented
+        "__pow__",
+        # "__lshift__", # Not yet implemented
+        # "__rshift__", # Not yet implemented
+        "__and__",
+        "__xor__",
+        "__or__",
+        # Reflected numeric operations.
+        "__radd__",
+        "__rsub__",
+        "__rmul__",
+        "__rmatmul__",
+        "__rtruediv__",
+        "__rfloordiv__",
+        "__rmod__",
+        # "__rdivmod__", # Not yet implemented
+        "__rpow__",
+        # "__rlshift__", # Not yet implemented
+        # "__rrshift__", # Not yet implemented
+        "__rand__",
+        "__rxor__",
+        "__ror__",
+        # Rich comparison operations.
+        "__lt__",
+        "__le__",
+        "__eq__",
+        "__ne__",
+        "__gt__",
+        "__ge__",
+    },
+)
+
+
+def _is_reflected_op(op):
+    return op[2] == "r" and op != "__rshift__"
+
+
+BinaryOperand._is_reflected_op = staticmethod(_is_reflected_op)
diff --git a/python/cudf/cudf/core/mixins/binops.pyi b/python/cudf/cudf/core/mixins/binops.pyi
new file mode 100644
index 00000000000..45093cd04d4
--- /dev/null
+++ b/python/cudf/cudf/core/mixins/binops.pyi
@@ -0,0 +1,88 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from typing import Set
+
+class BinaryOperand:
+    _SUPPORTED_BINARY_OPERATIONS: Set
+
+    def __add__(self, other):
+        ...
+
+    def __sub__(self, other):
+        ...
+
+    def __mul__(self, other):
+        ...
+
+    def __truediv__(self, other):
+        ...
+
+    def __floordiv__(self, other):
+        ...
+
+    def __mod__(self, other):
+        ...
+
+    def __pow__(self, other):
+        ...
+
+    def __and__(self, other):
+        ...
+
+    def __xor__(self, other):
+        ...
+
+    def __or__(self, other):
+        ...
+
+    def __radd__(self, other):
+        ...
+
+    def __rsub__(self, other):
+        ...
+
+    def __rmul__(self, other):
+        ...
+
+    def __rtruediv__(self, other):
+        ...
+
+    def __rfloordiv__(self, other):
+        ...
+
+    def __rmod__(self, other):
+        ...
+
+    def __rpow__(self, other):
+        ...
+
+    def __rand__(self, other):
+        ...
+
+    def __rxor__(self, other):
+        ...
+
+    def __ror__(self, other):
+        ...
+
+    def __lt__(self, other):
+        ...
+
+    def __le__(self, other):
+        ...
+
+    def __eq__(self, other):
+        ...
+
+    def __ne__(self, other):
+        ...
+
+    def __gt__(self, other):
+        ...
+
+    def __ge__(self, other):
+        ...
+
+    @staticmethod
+    def _is_reflected_op(op) -> bool:
+        ...
diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index ecb18f61830..7bbb299d643 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -86,14 +86,18 @@ def _should_define_operation(cls, operation, base_operation_name):
     # At this point we know that the class has the operation defined but it
     # also overrides the base operation. Since this function is called before
     # the operation is defined on the current class, we know that it inherited
-    # the operation from a parent. We therefore have two possibilities:
+    # the operation from a parent. We therefore have three possibilities:
     # 1. A parent class manually defined the operation. That override takes
     #    precedence even if the current class defined the base operation.
     # 2. A parent class has an auto-generated operation, i.e. it is of type
     #    Operation and was created by OperationMixin.__init_subclass__. The
     #    current class must override it so that its base operation is used
     #    rather than the parent's base operation.
+    # 3. The method is defined for all classes, i.e. it is a method of object.
     for base_cls in cls.__mro__:
+        # We always override methods defined for object.
+        if base_cls is object:
+            return True
         # The first attribute in the MRO is the one that will be used.
         if operation in base_cls.__dict__:
             return isinstance(base_cls.__dict__[operation], Operation)
@@ -216,6 +220,7 @@ def __init_subclass__(cls):
             # Only add the valid set of operations for a particular class.
             valid_operations = set()
             for base_cls in cls.__mro__:
+                # Check for sentinel indicating that all operations are valid.
                 valid_operations |= getattr(base_cls, validity_attr, set())
 
             invalid_operations = valid_operations - supported_operations
@@ -251,9 +256,8 @@ def _operation(self, op: str, *args, **kwargs):
     )
 
     setattr(OperationMixin, base_operation_name, _operation)
-    # This attribute is set in case lookup is convenient at a later point, but
-    # it is not strictly necessary since `supported_operations` is part of the
-    # closure associated with the class's creation.
+    # Making this attribute available makes it easy for subclasses to indicate
+    # that all supported operations for this mixin are valid.
     setattr(OperationMixin, supported_attr, supported_operations)
 
     return OperationMixin
diff --git a/python/cudf/cudf/core/mixins/reductions.pyi b/python/cudf/cudf/core/mixins/reductions.pyi
index 600f30e9372..3769b7c360e 100644
--- a/python/cudf/cudf/core/mixins/reductions.pyi
+++ b/python/cudf/cudf/core/mixins/reductions.pyi
@@ -1,8 +1,10 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
-from __future__ import annotations
+from typing import Set
 
 class Reducible:
+    _SUPPORTED_REDUCTIONS: Set
+
     def sum(self):
         ...
 
diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index 134b94bf0f2..1c81803ed98 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -10,6 +10,7 @@
 from cudf.core.column.column import ColumnBase
 from cudf.core.dtypes import ListDtype, StructDtype
 from cudf.core.index import BaseIndex
+from cudf.core.mixins import BinaryOperand
 from cudf.core.series import Series
 from cudf.utils.dtypes import (
     get_allowed_combinations_for_operator,
@@ -17,7 +18,7 @@
 )
 
 
-class Scalar:
+class Scalar(BinaryOperand):
     """
     A GPU-backed scalar object with NumPy scalar like properties
     May be used in binary operations against other scalars, cuDF
@@ -57,6 +58,8 @@ class Scalar:
         The data type
     """
 
+    _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS
+
     def __init__(self, value, dtype=None):
 
         self._host_value = None
@@ -211,69 +214,8 @@ def __float__(self):
     def __bool__(self):
         return bool(self.value)
 
-    # Scalar Binary Operations
-    def __add__(self, other):
-        return self._scalar_binop(other, "__add__")
-
-    def __radd__(self, other):
-        return self._scalar_binop(other, "__radd__")
-
-    def __sub__(self, other):
-        return self._scalar_binop(other, "__sub__")
-
-    def __rsub__(self, other):
-        return self._scalar_binop(other, "__rsub__")
-
-    def __mul__(self, other):
-        return self._scalar_binop(other, "__mul__")
-
-    def __rmul__(self, other):
-        return self._scalar_binop(other, "__rmul__")
-
-    def __truediv__(self, other):
-        return self._scalar_binop(other, "__truediv__")
-
-    def __floordiv__(self, other):
-        return self._scalar_binop(other, "__floordiv__")
-
-    def __rtruediv__(self, other):
-        return self._scalar_binop(other, "__rtruediv__")
-
-    def __mod__(self, other):
-        return self._scalar_binop(other, "__mod__")
-
-    def __divmod__(self, other):
-        return self._scalar_binop(other, "__divmod__")
-
-    def __and__(self, other):
-        return self._scalar_binop(other, "__and__")
-
-    def __xor__(self, other):
-        return self._scalar_binop(other, "__or__")
-
-    def __pow__(self, other):
-        return self._scalar_binop(other, "__pow__")
-
-    def __gt__(self, other):
-        return self._scalar_binop(other, "__gt__")
-
-    def __lt__(self, other):
-        return self._scalar_binop(other, "__lt__")
-
-    def __ge__(self, other):
-        return self._scalar_binop(other, "__ge__")
-
-    def __le__(self, other):
-        return self._scalar_binop(other, "__le__")
-
-    def __eq__(self, other):
-        return self._scalar_binop(other, "__eq__")
-
-    def __ne__(self, other):
-        return self._scalar_binop(other, "__ne__")
-
     def __round__(self, n):
-        return self._scalar_binop(n, "__round__")
+        return self._binaryop(n, "__round__")
 
     # Scalar Unary Operations
     def __abs__(self):
@@ -330,7 +272,7 @@ def _binop_result_dtype_or_error(self, other, op):
 
         return cudf.dtype(out_dtype)
 
-    def _scalar_binop(self, other, op):
+    def _binaryop(self, other, op: str):
         if isinstance(other, (ColumnBase, Series, BaseIndex, np.ndarray)):
             # dispatch to column implementation
             return NotImplemented
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index ec87fcdb066..fffce27c89a 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1245,21 +1245,21 @@ def logical_and(self, other):
             "Series.logical_and is deprecated and will be removed.",
             FutureWarning,
         )
-        return self._binaryop(other, "l_and").astype(np.bool_)
+        return self._binaryop(other, "__l_and__").astype(np.bool_)
 
     def remainder(self, other):
         warnings.warn(
             "Series.remainder is deprecated and will be removed.",
             FutureWarning,
         )
-        return self._binaryop(other, "mod")
+        return self._binaryop(other, "__mod__")
 
     def logical_or(self, other):
         warnings.warn(
             "Series.logical_or is deprecated and will be removed.",
             FutureWarning,
         )
-        return self._binaryop(other, "l_or").astype(np.bool_)
+        return self._binaryop(other, "__l_or__").astype(np.bool_)
 
     def logical_not(self):
         warnings.warn(
diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
index 705645b8349..04809f8fd59 100644
--- a/python/cudf/cudf/io/text.py
+++ b/python/cudf/cudf/io/text.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 from io import BytesIO, StringIO
 
@@ -12,7 +12,7 @@
 @annotate("READ_TEXT", color="purple", domain="cudf_python")
 @ioutils.doc_read_text()
 def read_text(
-    filepath_or_buffer, delimiter=None, **kwargs,
+    filepath_or_buffer, delimiter=None, byte_range=None, **kwargs,
 ):
     """{docstring}"""
 
@@ -24,5 +24,7 @@ def read_text(
     )
 
     return cudf.Series._from_data(
-        libtext.read_text(filepath_or_buffer, delimiter=delimiter,)
+        libtext.read_text(
+            filepath_or_buffer, delimiter=delimiter, byte_range=byte_range
+        )
     )
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index f3d69e1745e..6176184b670 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -1315,7 +1315,7 @@ def test_csv_reader_aligned_byte_range(tmpdir):
     [(None, None), ("int", "hex"), ("int32", "hex32"), ("int64", "hex64")],
 )
 def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype):
-    lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF", "9512c20b"]
+    lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF"]
     values = [int(hex_int, 16) for hex_int in lines]
 
     buffer = "\n".join(lines)
@@ -1334,6 +1334,35 @@ def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype):
         assert_eq(pdf, gdf)
 
 
+@pytest.mark.parametrize(
+    "np_dtype, gdf_dtype",
+    [("int", "hex"), ("int32", "hex32"), ("int64", "hex64")],
+)
+def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype):
+    # This tests values which cause an overflow warning that will become an
+    # error in pandas. NumPy wraps the overflow silently up to the bounds of a
+    # signed int64.
+    lines = [
+        "0x0",
+        "-0x1000",
+        "0xfedcba",
+        "0xABCDEF",
+        "0xaBcDeF",
+        "0x9512c20b",
+        "0x7fffffff",
+        "0x7fffffffffffffff",
+        "-0x8000000000000000",
+    ]
+    values = [int(hex_int, 16) for hex_int in lines]
+    buffer = "\n".join(lines)
+
+    gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
+
+    expected = np.array(values, dtype=np_dtype)
+    actual = gdf["hex_int"].to_numpy()
+    np.testing.assert_array_equal(expected, actual)
+
+
 @pytest.mark.parametrize("quoting", [0, 1, 2, 3])
 def test_csv_reader_pd_consistent_quotes(quoting):
     names = ["text"]
diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index 5ff66fc750f..fb6505f5f92 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 import numpy as np
 import pytest
@@ -778,3 +778,54 @@ def test_read_text(datadir):
     actual = cudf.read_text(chess_file, delimiter=delimiter)
 
     assert_eq(expected, actual)
+
+
+def test_read_text_byte_range(datadir):
+    chess_file = str(datadir) + "/chess.pgn"
+    delimiter = "1."
+
+    with open(chess_file, "r") as f:
+        data = f.read()
+        content = data.split(delimiter)
+
+    # Since Python split removes the delimiter and read_text does
+    # not we need to add it back to the 'content'
+    expected = cudf.Series(
+        [
+            c + delimiter if i < (len(content) - 1) else c
+            for i, c in enumerate(content)
+        ]
+    )
+
+    byte_range_size = (len(data) // 3) + (len(data) % 3 != 0)
+
+    actual_0 = cudf.read_text(
+        chess_file,
+        delimiter=delimiter,
+        byte_range=[byte_range_size * 0, byte_range_size],
+    )
+    actual_1 = cudf.read_text(
+        chess_file,
+        delimiter=delimiter,
+        byte_range=[byte_range_size * 1, byte_range_size],
+    )
+    actual_2 = cudf.read_text(
+        chess_file,
+        delimiter=delimiter,
+        byte_range=[byte_range_size * 2, byte_range_size],
+    )
+
+    actual = cudf.concat([actual_0, actual_1, actual_2], ignore_index=True)
+
+    assert_eq(expected, actual)
+
+
+def test_read_text_byte_range_large(datadir):
+    content = str(("\n" if x % 5 == 0 else "x") for x in range(0, 300000000))
+    delimiter = "1."
+    temp_file = str(datadir) + "/temp.txt"
+
+    with open(temp_file, "w") as f:
+        f.write(content)
+
+    cudf.read_text(temp_file, delimiter=delimiter)
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 4dadfede866..315da4d8dd6 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -24,12 +24,6 @@
 
 
 _EQUALITY_OPS = {
-    "eq",
-    "ne",
-    "lt",
-    "gt",
-    "le",
-    "ge",
     "__eq__",
     "__ne__",
     "__lt__",