diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index 6bad2342012..31d080e95d7 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
 export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index b1d649db8f9..08f9034357a 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 # ucx-py version
 export UCX_PY_VERSION='0.26.*'
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index e9d018a2d18..2225cbe0918 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -12,7 +12,7 @@ dependencies:
   - clang-tools=11.1.0
   - cupy>=9.5.0,<11.0.0a0
   - rmm=22.06.*
-  - cmake>=3.20.1
+  - cmake>=3.20.1,<3.23
   - cmake_setuptools>=0.1.3
   - python>=3.7,<3.9
   - numba>=0.54
diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp
index c2187f056cf..9e8fd1244d0 100644
--- a/cpp/include/cudf/io/orc.hpp
+++ b/cpp/include/cudf/io/orc.hpp
@@ -67,9 +67,6 @@ class orc_reader_options {
   // Cast timestamp columns to a specific type
   data_type _timestamp_type{type_id::EMPTY};
 
-  // Columns that should be converted from Decimal to Float64
-  std::vector<std::string> _decimal_cols_as_float;
-
   // Columns that should be read as Decimal128
   std::vector<std::string> _decimal128_columns;
 
@@ -138,14 +135,6 @@ class orc_reader_options {
    */
   data_type get_timestamp_type() const { return _timestamp_type; }
 
-  /**
-   * @brief Fully qualified names of columns that should be converted from Decimal to Float64.
-   */
-  std::vector<std::string> const& get_decimal_cols_as_float() const
-  {
-    return _decimal_cols_as_float;
-  }
-
   /**
    * @brief Fully qualified names of columns that should be read as 128-bit Decimal.
    */
@@ -215,18 +204,6 @@ class orc_reader_options {
    */
   void set_timestamp_type(data_type type) { _timestamp_type = type; }
 
-  /**
-   * @brief Set columns that should be converted from Decimal to Float64
-   *
-   * @param val Vector of fully qualified column names.
-   */
-  [[deprecated(
-    "Decimal to float conversion is deprecated and will be remove in future release")]] void
-  set_decimal_cols_as_float(std::vector<std::string> val)
-  {
-    _decimal_cols_as_float = std::move(val);
-  }
-
   /**
    * @brief Set columns that should be read as 128-bit Decimal
    *
@@ -340,21 +317,6 @@ class orc_reader_options_builder {
     return *this;
   }
 
-  /**
-   * @brief Columns that should be converted from decimals to float64.
-   *
-   * @param val Vector of column names.
-   * @return this for chaining.
-   */
-  [[deprecated(
-    "Decimal to float conversion is deprecated and will be remove in future "
-    "release")]] orc_reader_options_builder&
-  decimal_cols_as_float(std::vector<std::string> val)
-  {
-    options._decimal_cols_as_float = std::move(val);
-    return *this;
-  }
-
   /**
    * @brief Columns that should be read as 128-bit Decimal
    *
diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp
index 0e904958d15..0ab3953470d 100644
--- a/cpp/include/cudf/strings/replace_re.hpp
+++ b/cpp/include/cudf/strings/replace_re.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,7 +86,8 @@ std::unique_ptr<column> replace_re(
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @throw cudf::logic_error if capture index values in `replacement` are not in range 1-99
+ * @throw cudf::logic_error if capture index values in `replacement` are not in range 0-99, and also
+ * if the index exceeds the group count specified in the pattern
  *
  * @param strings Strings instance for this operation.
  * @param pattern The regular expression patterns to search within each string.
diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
index 5cbc0899196..059df283c94 100644
--- a/cpp/src/io/orc/reader_impl.cu
+++ b/cpp/src/io/orc/reader_impl.cu
@@ -241,30 +241,22 @@ size_t gather_stream_info(const size_t stripe_index,
 /**
  * @brief Determines cuDF type of an ORC Decimal column.
  */
-auto decimal_column_type(std::vector<std::string> const& float64_columns,
-                         std::vector<std::string> const& decimal128_columns,
+auto decimal_column_type(std::vector<std::string> const& decimal128_columns,
                          cudf::io::orc::detail::aggregate_orc_metadata const& metadata,
                          int column_index)
 {
-  if (metadata.get_col_type(column_index).kind != DECIMAL) return type_id::EMPTY;
+  if (metadata.get_col_type(column_index).kind != DECIMAL) { return type_id::EMPTY; }
 
-  auto const& column_path = metadata.column_path(0, column_index);
-  auto is_column_in       = [&](const std::vector<std::string>& cols) {
-    return std::find(cols.cbegin(), cols.cend(), column_path) != cols.end();
-  };
-
-  auto const user_selected_float64    = is_column_in(float64_columns);
-  auto const user_selected_decimal128 = is_column_in(decimal128_columns);
-  CUDF_EXPECTS(not user_selected_float64 or not user_selected_decimal128,
-               "Both decimal128 and float64 types selected for column " + column_path);
-
-  if (user_selected_float64) return type_id::FLOAT64;
-  if (user_selected_decimal128) return type_id::DECIMAL128;
+  if (std::find(decimal128_columns.cbegin(),
+                decimal128_columns.cend(),
+                metadata.column_path(0, column_index)) != decimal128_columns.end()) {
+    return type_id::DECIMAL128;
+  }
 
   auto const precision = metadata.get_col_type(column_index)
                            .precision.value_or(cuda::std::numeric_limits<int64_t>::digits10);
-  if (precision <= cuda::std::numeric_limits<int32_t>::digits10) return type_id::DECIMAL32;
-  if (precision <= cuda::std::numeric_limits<int64_t>::digits10) return type_id::DECIMAL64;
+  if (precision <= cuda::std::numeric_limits<int32_t>::digits10) { return type_id::DECIMAL32; }
+  if (precision <= cuda::std::numeric_limits<int64_t>::digits10) { return type_id::DECIMAL64; }
   return type_id::DECIMAL128;
 }
 
@@ -796,12 +788,11 @@ std::unique_ptr<column> reader::impl::create_empty_column(const size_type orc_co
                                                           rmm::cuda_stream_view stream)
 {
   schema_info.name = _metadata.column_name(0, orc_col_id);
-  auto const type  = to_type_id(
-    _metadata.get_schema(orc_col_id),
-    _use_np_dtypes,
-    _timestamp_type.id(),
-    decimal_column_type(_decimal_cols_as_float, decimal128_columns, _metadata, orc_col_id));
-  int32_t scale = 0;
+  auto const type  = to_type_id(_metadata.get_schema(orc_col_id),
+                               _use_np_dtypes,
+                               _timestamp_type.id(),
+                               decimal_column_type(decimal128_columns, _metadata, orc_col_id));
+  int32_t scale    = 0;
   std::vector<std::unique_ptr<column>> child_columns;
   std::unique_ptr<column> out_col = nullptr;
   auto kind                       = _metadata.get_col_type(orc_col_id).kind;
@@ -943,8 +934,7 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
   _use_np_dtypes = options.is_enabled_use_np_dtypes();
 
   // Control decimals conversion
-  _decimal_cols_as_float = options.get_decimal_cols_as_float();
-  decimal128_columns     = options.get_decimal128_columns();
+  decimal128_columns = options.get_decimal128_columns();
 }
 
 timezone_table reader::impl::compute_timezone_table(
@@ -1004,11 +994,10 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     // Get a list of column data types
     std::vector<data_type> column_types;
     for (auto& col : columns_level) {
-      auto col_type = to_type_id(
-        _metadata.get_col_type(col.id),
-        _use_np_dtypes,
-        _timestamp_type.id(),
-        decimal_column_type(_decimal_cols_as_float, decimal128_columns, _metadata, col.id));
+      auto col_type = to_type_id(_metadata.get_col_type(col.id),
+                                 _use_np_dtypes,
+                                 _timestamp_type.id(),
+                                 decimal_column_type(decimal128_columns, _metadata, col.id));
       CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type");
       if (col_type == type_id::DECIMAL32 or col_type == type_id::DECIMAL64 or
           col_type == type_id::DECIMAL128) {
diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp
index 1e586bcde00..103093f055f 100644
--- a/cpp/src/io/orc/reader_impl.hpp
+++ b/cpp/src/io/orc/reader_impl.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -221,7 +221,6 @@ class reader::impl {
 
   bool _use_index{true};
   bool _use_np_dtypes{true};
-  std::vector<std::string> _decimal_cols_as_float;
   std::vector<std::string> decimal128_columns;
   data_type _timestamp_type{type_id::EMPTY};
   reader_column_meta _col_meta{};
diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu
index dc09b3e7dd8..b4cbb5d9037 100644
--- a/cpp/src/io/orc/stripe_data.cu
+++ b/cpp/src/io/orc/stripe_data.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -962,15 +962,6 @@ static __device__ uint32_t Byte_RLE(orc_bytestream_s* bs,
   return rle->num_vals;
 }
 
-/**
- * @brief Powers of 10
- */
-static const __device__ __constant__ double kPow10[40] = {
-  1.0,   1.e1,  1.e2,  1.e3,  1.e4,  1.e5,  1.e6,  1.e7,  1.e8,  1.e9,  1.e10, 1.e11, 1.e12, 1.e13,
-  1.e14, 1.e15, 1.e16, 1.e17, 1.e18, 1.e19, 1.e20, 1.e21, 1.e22, 1.e23, 1.e24, 1.e25, 1.e26, 1.e27,
-  1.e28, 1.e29, 1.e30, 1.e31, 1.e32, 1.e33, 1.e34, 1.e35, 1.e36, 1.e37, 1.e38, 1.e39,
-};
-
 static const __device__ __constant__ int64_t kPow5i[28] = {1,
                                                            5,
                                                            25,
@@ -1045,34 +1036,24 @@ static __device__ int Decode_Decimals(orc_bytestream_s* bs,
       auto const pos = static_cast<int>(vals.i64[2 * t]);
       __int128_t v   = decode_varint128(bs, pos);
 
-      if (dtype_id == type_id::FLOAT64) {
-        double f      = v;
-        int32_t scale = (t < numvals) ? val_scale : 0;
-        if (scale >= 0)
-          vals.f64[t] = f / kPow10[min(scale, 39)];
-        else
-          vals.f64[t] = f * kPow10[min(-scale, 39)];
-      } else {
-        auto const scaled_value = [&]() {
-          // Since cuDF column stores just one scale, value needs to be adjusted to col_scale from
-          // val_scale. So the difference of them will be used to add 0s or remove digits.
-          int32_t scale = (t < numvals) ? col_scale - val_scale : 0;
-          if (scale >= 0) {
-            scale = min(scale, 27);
-            return (v * kPow5i[scale]) << scale;
-          } else  // if (scale < 0)
-          {
-            scale = min(-scale, 27);
-            return (v / kPow5i[scale]) >> scale;
-          }
-        }();
-        if (dtype_id == type_id::DECIMAL32) {
-          vals.i32[t] = scaled_value;
-        } else if (dtype_id == type_id::DECIMAL64) {
-          vals.i64[t] = scaled_value;
+      auto const scaled_value = [&]() {
+        // Since cuDF column stores just one scale, value needs to be adjusted to col_scale from
+        // val_scale. So the difference of them will be used to add 0s or remove digits.
+        int32_t const scale = (t < numvals) ? col_scale - val_scale : 0;
+        if (scale >= 0) {
+          auto const abs_scale = min(scale, 27);
+          return (v * kPow5i[abs_scale]) << abs_scale;
         } else {
-          vals.i128[t] = scaled_value;
+          auto const abs_scale = min(-scale, 27);
+          return (v / kPow5i[abs_scale]) >> abs_scale;
         }
+      }();
+      if (dtype_id == type_id::DECIMAL32) {
+        vals.i32[t] = scaled_value;
+      } else if (dtype_id == type_id::DECIMAL64) {
+        vals.i64[t] = scaled_value;
+      } else {
+        vals.i128[t] = scaled_value;
       }
     }
     // There is nothing to read, so break
@@ -1711,8 +1692,7 @@ __global__ void __launch_bounds__(block_size)
             case DECIMAL:
               if (s->chunk.dtype_id == type_id::DECIMAL32) {
                 static_cast<uint32_t*>(data_out)[row] = s->vals.u32[t + vals_skipped];
-              } else if (s->chunk.dtype_id == type_id::FLOAT64 or
-                         s->chunk.dtype_id == type_id::DECIMAL64) {
+              } else if (s->chunk.dtype_id == type_id::DECIMAL64) {
                 static_cast<uint64_t*>(data_out)[row] = s->vals.u64[t + vals_skipped];
               } else {
                 // decimal128
diff --git a/cpp/src/join/mixed_join_kernels.cu b/cpp/src/join/mixed_join_kernels.cu
index 5638f0ddd38..efaea841e45 100644
--- a/cpp/src/join/mixed_join_kernels.cu
+++ b/cpp/src/join/mixed_join_kernels.cu
@@ -35,18 +35,19 @@ namespace detail {
 namespace cg = cooperative_groups;
 
 template <cudf::size_type block_size, bool has_nulls>
-__global__ void mixed_join(table_device_view left_table,
-                           table_device_view right_table,
-                           table_device_view probe,
-                           table_device_view build,
-                           row_equality const equality_probe,
-                           join_kind const join_type,
-                           cudf::detail::mixed_multimap_type::device_view hash_table_view,
-                           size_type* join_output_l,
-                           size_type* join_output_r,
-                           cudf::ast::detail::expression_device_view device_expression_data,
-                           cudf::size_type const* join_result_offsets,
-                           bool const swap_tables)
+__launch_bounds__(block_size) __global__
+  void mixed_join(table_device_view left_table,
+                  table_device_view right_table,
+                  table_device_view probe,
+                  table_device_view build,
+                  row_equality const equality_probe,
+                  join_kind const join_type,
+                  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                  size_type* join_output_l,
+                  size_type* join_output_r,
+                  cudf::ast::detail::expression_device_view device_expression_data,
+                  cudf::size_type const* join_result_offsets,
+                  bool const swap_tables)
 {
   // Normally the casting of a shared memory array is used to create multiple
   // arrays of different types from the shared memory buffer, but here it is
diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu
index c8cfc9998f0..63a69554245 100644
--- a/cpp/src/join/mixed_join_kernels_semi.cu
+++ b/cpp/src/join/mixed_join_kernels_semi.cu
@@ -32,17 +32,18 @@ namespace detail {
 namespace cg = cooperative_groups;
 
 template <cudf::size_type block_size, bool has_nulls>
-__global__ void mixed_join_semi(table_device_view left_table,
-                                table_device_view right_table,
-                                table_device_view probe,
-                                table_device_view build,
-                                row_equality const equality_probe,
-                                join_kind const join_type,
-                                cudf::detail::semi_map_type::device_view hash_table_view,
-                                size_type* join_output_l,
-                                cudf::ast::detail::expression_device_view device_expression_data,
-                                cudf::size_type const* join_result_offsets,
-                                bool const swap_tables)
+__launch_bounds__(block_size) __global__
+  void mixed_join_semi(table_device_view left_table,
+                       table_device_view right_table,
+                       table_device_view probe,
+                       table_device_view build,
+                       row_equality const equality_probe,
+                       join_kind const join_type,
+                       cudf::detail::semi_map_type::device_view hash_table_view,
+                       size_type* join_output_l,
+                       cudf::ast::detail::expression_device_view device_expression_data,
+                       cudf::size_type const* join_result_offsets,
+                       bool const swap_tables)
 {
   // Normally the casting of a shared memory array is used to create multiple
   // arrays of different types from the shared memory buffer, but here it is
diff --git a/cpp/src/join/mixed_join_size_kernels.cu b/cpp/src/join/mixed_join_size_kernels.cu
index 1a08b8792c2..22c71bfc33a 100644
--- a/cpp/src/join/mixed_join_size_kernels.cu
+++ b/cpp/src/join/mixed_join_size_kernels.cu
@@ -35,7 +35,7 @@ namespace detail {
 namespace cg = cooperative_groups;
 
 template <int block_size, bool has_nulls>
-__global__ void compute_mixed_join_output_size(
+__launch_bounds__(block_size) __global__ void compute_mixed_join_output_size(
   table_device_view left_table,
   table_device_view right_table,
   table_device_view probe,
diff --git a/cpp/src/join/mixed_join_size_kernels_semi.cu b/cpp/src/join/mixed_join_size_kernels_semi.cu
index 2c077a698f8..f6b9fb85bbb 100644
--- a/cpp/src/join/mixed_join_size_kernels_semi.cu
+++ b/cpp/src/join/mixed_join_size_kernels_semi.cu
@@ -32,7 +32,7 @@ namespace detail {
 namespace cg = cooperative_groups;
 
 template <int block_size, bool has_nulls>
-__global__ void compute_mixed_join_output_size_semi(
+__launch_bounds__(block_size) __global__ void compute_mixed_join_output_size_semi(
   table_device_view left_table,
   table_device_view right_table,
   table_device_view probe,
diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu
index 27e0bd4fac9..384813d6e3d 100644
--- a/cpp/src/strings/replace/backref_re.cu
+++ b/cpp/src/strings/replace/backref_re.cu
@@ -68,7 +68,8 @@ std::string get_backref_pattern(std::string const& repl)
  * For example, for input string 'hello \2 and \1' the returned `backref_type` vector
  * contains `[(2,6),(1,11)]` and the returned string is 'hello  and '.
  */
-std::pair<std::string, std::vector<backref_type>> parse_backrefs(std::string const& repl)
+std::pair<std::string, std::vector<backref_type>> parse_backrefs(std::string const& repl,
+                                                                 int const group_count)
 {
   std::vector<backref_type> backrefs;
   std::string str = repl;  // make a modifiable copy
@@ -79,7 +80,8 @@ std::pair<std::string, std::vector<backref_type>> parse_backrefs(std::string con
   while (std::regex_search(str, m, ex) && !m.empty()) {
     // parse the back-ref index number
     size_type const index = static_cast<size_type>(std::atoi(std::string{m[1]}.c_str()));
-    CUDF_EXPECTS(index > 0 && index < 100, "Group index numbers must be in the range 1-99");
+    CUDF_EXPECTS(index >= 0 && index <= group_count,
+                 "Group index numbers must be in the range 0 to group count");
 
     // store the new byte offset and index value
     size_type const position = static_cast<size_type>(m.position(0));
@@ -146,7 +148,8 @@ std::unique_ptr<column> replace_with_backrefs(
     reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream);
 
   // parse the repl string for back-ref indicators
-  auto const parse_result = parse_backrefs(replacement);
+  auto group_count = std::min(99, d_prog->group_counts());  // group count should NOT exceed 99
+  auto const parse_result = parse_backrefs(replacement, group_count);
   rmm::device_uvector<backref_type> backrefs =
     cudf::detail::make_device_uvector_async(parse_result.second, stream);
   string_scalar repl_scalar(parse_result.first, true, stream);
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index bac5bf1f55b..5823a859f7b 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -1421,17 +1421,9 @@ TEST_F(OrcReaderTest, DecimalOptions)
 
   cudf_io::orc_reader_options valid_opts =
     cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
-      .decimal128_columns({"dec", "fake_name"})
-      .decimal_cols_as_float({"decc", "fake_name"});
-  // Should not throw, even with "fake name" in both options
+      .decimal128_columns({"dec", "fake_name"});
+  // Should not throw, even with "fake name"
   EXPECT_NO_THROW(cudf_io::read_orc(valid_opts));
-
-  cudf_io::orc_reader_options invalid_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
-      .decimal128_columns({"dec", "fake_name"})
-      .decimal_cols_as_float({"dec", "fake_name"});
-  // Should throw, options overlap
-  EXPECT_THROW(cudf_io::read_orc(invalid_opts), cudf::logic_error);
 }
 
 TEST_F(OrcWriterTest, DecimalOptionsNested)
diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp
index 7c87d8bf64a..2b9e8b7aae7 100644
--- a/cpp/tests/strings/replace_regex_tests.cpp
+++ b/cpp/tests/strings/replace_regex_tests.cpp
@@ -281,13 +281,32 @@ TEST_F(StringsReplaceRegexTest, BackrefWithGreedyQuantifier)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
+TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexZeroIndexTest)
+{
+  cudf::test::strings_column_wrapper strings(
+    {"TEST123", "TEST1TEST2", "TEST2-TEST1122", "TEST1-TEST-T", "TES3"});
+  auto strings_view         = cudf::strings_column_view(strings);
+  std::string pattern       = "(TEST)(\\d+)";
+  std::string repl_template = "${0}: ${1}, ${2}; ";
+  auto results = cudf::strings::replace_with_backrefs(strings_view, pattern, repl_template);
+
+  cudf::test::strings_column_wrapper expected({
+    "TEST123: TEST, 123; ",
+    "TEST1: TEST, 1; TEST2: TEST, 2; ",
+    "TEST2: TEST, 2; -TEST1122: TEST, 1122; ",
+    "TEST1: TEST, 1; -TEST-T",
+    "TES3",
+  });
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+}
+
 TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexErrorTest)
 {
   cudf::test::strings_column_wrapper strings({"this string left intentionally blank"});
   auto view = cudf::strings_column_view(strings);
 
-  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, "(\\w)", "\\0"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, "(\\w)", "\\123"), cudf::logic_error);
+  // group index(3) exceeds the group count(2)
+  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, "(\\w).(\\w)", "\\3"), cudf::logic_error);
   EXPECT_THROW(cudf::strings::replace_with_backrefs(view, "", "\\1"), cudf::logic_error);
   EXPECT_THROW(cudf::strings::replace_with_backrefs(view, "(\\w)", ""), cudf::logic_error);
 }
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index d1509f14c6e..58901d5743b 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4987,6 +4987,22 @@ void testStringReplaceWithBackrefs() {
       assertColumnsAreEqual(expected, actual);
     }
 
+    // test zero as group index
+    try (ColumnVector v = ColumnVector.fromStrings("aa-11 b2b-345", "aa-11a 1c-2b2 b2-c3", "11-aa", null);
+         ColumnVector expected = ColumnVector.fromStrings("aa-11:aa:11; b2b-345:b:345;",
+             "aa-11:aa:11;a 1c-2:c:2;b2 b2-c3", "11-aa", null);
+         ColumnVector actual = v.stringReplaceWithBackrefs(
+             "([a-z]+)-([0-9]+)", "${0}:${1}:${2};")) {
+      assertColumnsAreEqual(expected, actual);
+    }
+
+    // group index exceeds group count
+    assertThrows(CudfException.class, () -> {
+      try (ColumnVector v = ColumnVector.fromStrings("ABC123defgh");
+           ColumnVector r = v.stringReplaceWithBackrefs("([A-Z]+)([0-9]+)([a-z]+)", "\\4")) {
+      }
+    });
+
   }
 
   @Test
diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd
index 0c2f971a26c..62ff5eb4f53 100644
--- a/python/cudf/cudf/_lib/cpp/io/orc.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint8_t
 from libcpp cimport bool
@@ -36,7 +36,6 @@ cdef extern from "cudf/io/orc.hpp" \
         void enable_use_index(bool val) except+
         void enable_use_np_dtypes(bool val) except+
         void set_timestamp_type(data_type type) except+
-        void set_decimal_cols_as_float(vector[string] val) except+
 
         @staticmethod
         orc_reader_options_builder builder(
@@ -55,9 +54,6 @@ cdef extern from "cudf/io/orc.hpp" \
         orc_reader_options_builder& use_index(bool val) except+
         orc_reader_options_builder& use_np_dtypes(bool val) except+
         orc_reader_options_builder& timestamp_type(data_type type) except+
-        orc_reader_options_builder& decimal_cols_as_float(
-            vector[string] val
-        ) except+
 
         orc_reader_options build() except+
 
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index 127e3a612dc..8331f9c3d17 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -93,7 +93,6 @@ cpdef read_orc(object filepaths_or_buffers,
                object skip_rows=None,
                object num_rows=None,
                bool use_index=True,
-               object decimal_cols_as_float=None,
                object timestamp_type=None):
     """
     Cython function to call into libcudf API, see `read_orc`.
@@ -120,7 +119,6 @@ cpdef read_orc(object filepaths_or_buffers,
             )
         ),
         use_index,
-        decimal_cols_as_float or [],
     )
 
     cdef table_with_metadata c_result
@@ -319,8 +317,7 @@ cdef orc_reader_options make_orc_reader_options(
     size_type skip_rows,
     size_type num_rows,
     type_id timestamp_type,
-    bool use_index,
-    object decimal_cols_as_float
+    bool use_index
 ) except*:
 
     for i, datasource in enumerate(filepaths_or_buffers):
@@ -333,10 +330,6 @@ cdef orc_reader_options make_orc_reader_options(
         c_column_names.push_back(str(col).encode())
     cdef orc_reader_options opts
     cdef source_info src = make_source_info(filepaths_or_buffers)
-    cdef vector[string] c_decimal_cols_as_float
-    c_decimal_cols_as_float.reserve(len(decimal_cols_as_float))
-    for decimal_col in decimal_cols_as_float:
-        c_decimal_cols_as_float.push_back(str(decimal_col).encode())
     opts = move(
         orc_reader_options.builder(src)
         .columns(c_column_names)
@@ -345,7 +338,6 @@ cdef orc_reader_options make_orc_reader_options(
         .num_rows(num_rows)
         .timestamp_type(data_type(timestamp_type))
         .use_index(use_index)
-        .decimal_cols_as_float(c_decimal_cols_as_float)
         .build()
     )
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 17cac3593a3..08a30729e7c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1339,8 +1339,14 @@ def _slice(self: T, arg: slice) -> T:
 
     @_cudf_nvtx_annotate
     def memory_usage(self, index=True, deep=False):
-        return Series(
-            {str(k): v for k, v in super().memory_usage(index, deep).items()}
+        mem_usage = [col.memory_usage for col in self._data.columns]
+        names = [str(name) for name in self._data.names]
+        if index:
+            mem_usage.append(self._index.memory_usage())
+            names.append("Index")
+        return Series._from_data(
+            data={None: as_column(mem_usage)},
+            index=as_index(names),
         )
 
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index a84606b0953..75c6e4d0964 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -339,12 +339,7 @@ def memory_usage(self, deep=False):
         -------
         The total bytes used.
         """
-        if deep:
-            warnings.warn(
-                "The deep parameter is ignored and is only included "
-                "for pandas compatibility."
-            )
-        return {name: col.memory_usage for name, col in self._data.items()}
+        raise NotImplementedError
 
     def __len__(self):
         return self._num_rows
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 7df5be3f692..a31fe4c3b99 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -914,7 +914,7 @@ def _concat(cls, objs):
 
     @_cudf_nvtx_annotate
     def memory_usage(self, deep=False):
-        return sum(super().memory_usage(deep=deep).values())
+        return self._column.memory_usage
 
     @_cudf_nvtx_annotate
     def equals(self, other, **kwargs):
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index c5c2322d95a..458fc16c511 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -704,10 +704,7 @@ def memory_usage(self, index=True, deep=False):
         >>> s.memory_usage(index=False)
         24
         """
-        usage = super().memory_usage(deep=deep)
-        if index:
-            usage["Index"] = self.index.memory_usage()
-        return usage
+        raise NotImplementedError
 
     def hash_values(self, method="murmur3"):
         """Compute the hash of values in this column.
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 39228f034d4..d80fb00942b 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1474,7 +1474,7 @@ def _clean_nulls_from_index(self):
 
     @_cudf_nvtx_annotate
     def memory_usage(self, deep=False):
-        usage = sum(super().memory_usage(deep=deep).values())
+        usage = sum(col.memory_usage for col in self._data.columns)
         if self.levels:
             for level in self.levels:
                 usage += level.memory_usage(deep=deep)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 0ea02edb924..8748b9775be 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -856,7 +856,9 @@ def to_frame(self, name=None):
 
     @_cudf_nvtx_annotate
     def memory_usage(self, index=True, deep=False):
-        return sum(super().memory_usage(index, deep).values())
+        return self._column.memory_usage + (
+            self._index.memory_usage() if index else 0
+        )
 
     @_cudf_nvtx_annotate
     def __array_function__(self, func, types, args, kwargs):
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index 0ac0e02e4d1..6a2ffef52db 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -287,18 +287,11 @@ def read_orc(
     skiprows=None,
     num_rows=None,
     use_index=True,
-    decimal_cols_as_float=None,
     timestamp_type=None,
     use_python_file_object=True,
     **kwargs,
 ):
     """{docstring}"""
-    if decimal_cols_as_float is not None:
-        warnings.warn(
-            "`decimal_cols_as_float` is deprecated and will be removed in "
-            "the future",
-            FutureWarning,
-        )
     from cudf import DataFrame
 
     # Multiple sources are passed as a list. If a single source is passed,
@@ -365,7 +358,6 @@ def read_orc(
                 skiprows,
                 num_rows,
                 use_index,
-                decimal_cols_as_float,
                 timestamp_type,
             )
         )
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 62715ad7580..5082fb08b92 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1266,10 +1266,7 @@ def test_map_type_read(columns, num_rows, use_index):
         assert_eq(expected_tbl.to_pandas(), gdf)
 
 
-@pytest.mark.parametrize(
-    "data", [["_col0"], ["FakeName", "_col0", "TerriblyFakeColumnName"]]
-)
-def test_orc_reader_decimal(datadir, data):
+def test_orc_reader_decimal(datadir):
     path = datadir / "TestOrcFile.decimal.orc"
     try:
         orcfile = pa.orc.ORCFile(path)
@@ -1277,28 +1274,8 @@ def test_orc_reader_decimal(datadir, data):
         pytest.skip(".orc file is not found: %s" % e)
 
     pdf = orcfile.read().to_pandas()
-    gdf = cudf.read_orc(path, decimal_cols_as_float=data).to_pandas()
-
-    # Convert the decimal dtype from PyArrow to float64 for comparison to cuDF
-    # This is because cuDF returns as float64
-    pdf = pdf.apply(pd.to_numeric)
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize("data", [["InvalidColumnName"]])
-def test_orc_reader_decimal_invalid_column(datadir, data):
-    path = datadir / "TestOrcFile.decimal.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
-
-    pdf = orcfile.read().to_pandas()
-    gdf = cudf.read_orc(path, decimal_cols_as_float=data).to_pandas()
+    gdf = cudf.read_orc(path).to_pandas()
 
-    # Since the `decimal_cols_as_float` column name
-    # is invalid, this should be a decimal
     assert_eq(pdf, gdf)
 
 
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index cfe1957dfd6..5f348563243 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -392,9 +392,6 @@
     If not None, the total number of rows to read.
 use_index : bool, default True
     If True, use row index if available for faster seeking.
-decimal_cols_as_float: list, default None
-    If specified, names of the columns that should be converted from
-    Decimal to Float64 in the resulting dataframe.
 use_python_file_object : boolean, default True
     If True, Arrow-backed PythonFile objects will be used in place of fsspec
     AbstractBufferedFile objects at IO time. This option is likely to improve
diff --git a/python/cudf/requirements/cuda-11.0/dev_requirements.txt b/python/cudf/requirements/cuda-11.0/dev_requirements.txt
deleted file mode 100644
index d8dce276820..00000000000
--- a/python/cudf/requirements/cuda-11.0/dev_requirements.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-# pyarrow gpu package will have to be built from source :
-# https://arrow.apache.org/docs/python/install.html#installing-from-source
-
-cupy-cuda110
-cachetools
-cmake
-cmake-setuptools>=0.1.3
-cython>=0.29,<0.30
-dlpack
-fastavro>=0.22.9
-python-snappy>=0.6.0
-fsspec>=0.6.0
-hypothesis
-mimesis<4.1
-mypy==0.782
-nbsphinx
-numba>=0.53.1
-numpy
-numpydoc
-nvtx>=0.2.1
-packaging
-pandas>=1.0,<1.4.0dev0
-pandoc==2.0a4
-protobuf
-pydata-sphinx-theme
-pyorc
-pytest
-pytest-benchmark
-pytest-xdist
-rapidjson
-recommonmark
-setuptools
-sphinx
-sphinx-copybutton
-sphinx-markdown-tables
-sphinxcontrib-websupport
-transformers<=4.10.3
-typing_extensions
-wheel
diff --git a/python/cudf/requirements/cuda-11.2/dev_requirements.txt b/python/cudf/requirements/cuda-11.2/dev_requirements.txt
deleted file mode 100644
index c11d108360d..00000000000
--- a/python/cudf/requirements/cuda-11.2/dev_requirements.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-# pyarrow gpu package will have to be built from source :
-# https://arrow.apache.org/docs/python/install.html#installing-from-source
-
-cupy-cuda112
-cachetools
-cmake
-cmake-setuptools>=0.1.3
-cython>=0.29,<0.30
-dlpack
-fastavro>=0.22.9
-python-snappy>=0.6.0
-fsspec>=0.6.0
-hypothesis
-mimesis<4.1
-mypy==0.782
-nbsphinx
-numba>=0.53.1
-numpy
-numpydoc
-nvtx>=0.2.1
-packaging
-pandas>=1.0,<1.4.0dev0
-pandoc==2.0a4
-protobuf
-pydata-sphinx-theme
-pyorc
-pytest
-pytest-benchmark
-pytest-xdist
-rapidjson
-recommonmark
-setuptools
-sphinx
-sphinx-copybutton
-sphinx-markdown-tables
-sphinxcontrib-websupport
-transformers<=4.10.3
-typing_extensions
-wheel
diff --git a/python/cudf_kafka/dev_requirements.txt b/python/cudf_kafka/dev_requirements.txt
deleted file mode 100644
index af52659e08e..00000000000
--- a/python/cudf_kafka/dev_requirements.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-flake8==3.8.3
-black==19.10b0
-isort==5.6.4
-python-confluent-kafka
-pytest
-setuptools
-wheel
-cython>=0.29,<0.30
-python-confluent-kafka
diff --git a/python/custreamz/dev_requirements.txt b/python/custreamz/dev_requirements.txt
deleted file mode 100644
index a6b44c640f6..00000000000
--- a/python/custreamz/dev_requirements.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-flake8==3.8.3
-black==19.10b0
-isort==5.6.4
-dask==2022.03.0
-distributed==2022.03.0
-streamz
-python-confluent-kafka
-pytest
-setuptools
-wheel
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index d1edfb071a2..36e3416c8a3 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -398,7 +398,10 @@ def group_split_cudf(df, c, k, ignore_index=False):
 @sizeof_dispatch.register(cudf.DataFrame)
 @_dask_cudf_nvtx_annotate
 def sizeof_cudf_dataframe(df):
-    return int(df.memory_usage().sum())
+    return int(
+        sum(col.memory_usage for col in df._data.columns)
+        + df._index.memory_usage()
+    )
 
 
 @sizeof_dispatch.register((cudf.Series, cudf.BaseIndex))
diff --git a/python/dask_cudf/dev_requirements.txt b/python/dask_cudf/dev_requirements.txt
deleted file mode 100644
index 438317adf87..00000000000
--- a/python/dask_cudf/dev_requirements.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-dask==2022.03.0
-distributed==2022.03.0
-fsspec>=0.6.0
-numba>=0.53.1
-numpy
-pandas>=1.0,<1.4.0dev0
-pytest
-setuptools
-wheel
-flake8==3.8.3
-black==19.10b0
-isort==5.6.4