From d1cebdbf50d92358962bc66a1b2c0a8476de1f35 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 7 Feb 2023 11:11:08 -0500
Subject: [PATCH 01/24] Refactor reduction logic for fixed-point types (#12652)

Reworks some internal source specific to fixed-point types using `cudf::reduce` by removing the duplicated code logic. This was found while working on #12573 and #10432. Since the fix is requires no dependencies, this separate PR is used to minimize code review churn. This should help with code consistency with the fixed-point-specific logic when added to segmented-reduction.
No function has changed so all existing gtests are adequate.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12652
---
 cpp/src/reductions/simple.cuh            | 31 +++---------------------
 cpp/tests/reductions/reduction_tests.cpp |  3 +--
 2 files changed, 4 insertions(+), 30 deletions(-)
diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh
index 76ed864a92d..5fe7b91e28a 100644
--- a/cpp/src/reductions/simple.cuh
+++ b/cpp/src/reductions/simple.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -115,37 +115,12 @@ std::unique_ptr<scalar> fixed_point_reduction(
 {
   using Type = device_storage_type_t<DecimalXX>;
 
-  auto dcol      = cudf::column_device_view::create(col, stream);
-  auto simple_op = Op{};
-
-  // Cast initial value
-  std::optional<Type> const initial_value = [&] {
-    if (init.has_value() && init.value().get().is_valid()) {
-      using ScalarType = cudf::scalar_type_t<Type>;
-      return std::optional<Type>(
-        static_cast<const ScalarType*>(&init.value().get())->value(stream));
-    } else {
-      return std::optional<Type>(std::nullopt);
-    }
-  }();
-
-  auto result = [&] {
-    if (col.has_nulls()) {
-      auto f  = simple_op.template get_null_replacing_element_transformer<Type>();
-      auto it = thrust::make_transform_iterator(dcol->pair_begin<Type, true>(), f);
-      return cudf::reduction::detail::reduce(it, col.size(), simple_op, initial_value, stream, mr);
-    } else {
-      auto f  = simple_op.template get_element_transformer<Type>();
-      auto it = thrust::make_transform_iterator(dcol->begin<Type>(), f);
-      return cudf::reduction::detail::reduce(it, col.size(), simple_op, initial_value, stream, mr);
-    }
-  }();
+  auto result = simple_reduction<Type, Type, Op>(col, init, stream, mr);
 
   auto const scale = [&] {
     if (std::is_same_v<Op, cudf::reduction::op::product>) {
       auto const valid_count = static_cast<int32_t>(col.size() - col.null_count());
-      return numeric::scale_type{col.type().scale() *
-                                 (valid_count + (initial_value.has_value() ? 1 : 0))};
+      return numeric::scale_type{col.type().scale() * (valid_count + (init.has_value() ? 1 : 0))};
     } else if (std::is_same_v<Op, cudf::reduction::op::sum_of_squares>) {
       return numeric::scale_type{col.type().scale() * 2};
     }
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 997a5daa189..5a358687893 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -294,7 +294,6 @@ TYPED_TEST(SumReductionTest, Sum)
                  .second);
 }
 
-using ReductionTypes = cudf::test::Types<int16_t, int32_t, float, double>;
 TYPED_TEST_SUITE(ReductionTest, cudf::test::NumericTypes);
 
 TYPED_TEST(ReductionTest, Product)

From 291c7519546acea9335a1e79002a3451e70c6d83 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 7 Feb 2023 15:53:50 -0600
Subject: [PATCH 02/24] Add build dependencies to Java tests. (#12723)

The `latest` CI container was recently updated from CUDA 11.5 to CUDA 11.8. Due to some changes in the image, there are no compilers or `make`. To address this, I added the `build` dependency list to the `test_java` file in `dependencies.yaml`.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12723
---
 ci/test_java.sh   | 4 +++-
 dependencies.yaml | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/test_java.sh b/ci/test_java.sh
index a0ba7c41607..27a1f2aa46f 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -13,6 +13,8 @@ rapids-dependency-file-generator \
 
 rapids-mamba-retry env create --force -f env.yaml -n test
 
+export CMAKE_GENERATOR=Ninja
+
 # Temporarily allow unbound variables for conda activation.
 set +u
 conda activate test
diff --git a/dependencies.yaml b/dependencies.yaml
index 73206795fdb..0a3a2ce7828 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -28,6 +28,7 @@ files:
   test_java:
     output: none
     includes:
+      - build
       - cudatoolkit
       - test_java
   test_notebooks:

From 37fe468faa48c4000387a3830b0170cb1b7c5f07 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 7 Feb 2023 18:11:53 -0600
Subject: [PATCH 03/24] Enable doctests for GroupBy methods (#12658)

This PR enables doctests for some GroupBy methods that are not currently tested due to not meeting the inclusion criteria in our doctest class. This includes enabling tests for `GroupBy.apply` with `engine='jit'`.

came up during https://github.com/rapidsai/cudf/pull/11452

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12658
---
 python/cudf/cudf/core/groupby/__init__.py |  7 ++-
 python/cudf/cudf/core/groupby/groupby.py  | 60 ++++++++++++-----------
 python/cudf/cudf/tests/test_doctests.py   |  8 ++-
 3 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/__init__.py b/python/cudf/cudf/core/groupby/__init__.py
index bb21dd1729d..4375ed3e3da 100644
--- a/python/cudf/cudf/core/groupby/__init__.py
+++ b/python/cudf/cudf/core/groupby/__init__.py
@@ -1,3 +1,8 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from cudf.core.groupby.groupby import GroupBy, Grouper
+
+__all__ = [
+    "GroupBy",
+    "Grouper",
+]
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 91e00eb43f3..04a71b77413 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -111,7 +111,7 @@ def _quantile_75(x):
 ...     'Max Speed': [380., 370., 24., 26.],
 ... }})
 >>> df
-    Animal  Max Speed
+   Animal  Max Speed
 0  Falcon      380.0
 1  Falcon      370.0
 2  Parrot       24.0
@@ -420,8 +420,11 @@ def agg(self, func):
         Examples
         --------
         >>> import cudf
-        >>> a = cudf.DataFrame(
-            {'a': [1, 1, 2], 'b': [1, 2, 3], 'c': [2, 2, 1]})
+        >>> a = cudf.DataFrame({
+        ...     'a': [1, 1, 2],
+        ...     'b': [1, 2, 3],
+        ...     'c': [2, 2, 1]
+        ... })
         >>> a.groupby('a').agg('sum')
            b  c
         a
@@ -430,6 +433,12 @@ def agg(self, func):
 
         Specifying a list of aggregations to perform on each column.
 
+        >>> import cudf
+        >>> a = cudf.DataFrame({
+        ...     'a': [1, 1, 2],
+        ...     'b': [1, 2, 3],
+        ...     'c': [2, 2, 1]
+        ... })
         >>> a.groupby('a').agg(['sum', 'min'])
             b       c
           sum min sum min
@@ -439,6 +448,12 @@ def agg(self, func):
 
         Using a dict to specify aggregations to perform per column.
 
+        >>> import cudf
+        >>> a = cudf.DataFrame({
+        ...     'a': [1, 1, 2],
+        ...     'b': [1, 2, 3],
+        ...     'c': [2, 2, 1]
+        ... })
         >>> a.groupby('a').agg({'a': 'max', 'b': ['min', 'mean']})
             a   b
           max min mean
@@ -448,6 +463,12 @@ def agg(self, func):
 
         Using lambdas/callables to specify aggregations taking parameters.
 
+        >>> import cudf
+        >>> a = cudf.DataFrame({
+        ...     'a': [1, 1, 2],
+        ...     'b': [1, 2, 3],
+        ...     'c': [2, 2, 1]
+        ... })
         >>> f1 = lambda x: x.quantile(0.5); f1.__name__ = "q0.5"
         >>> f2 = lambda x: x.quantile(0.75); f2.__name__ = "q0.75"
         >>> a.groupby('a').agg([f1, f2])
@@ -905,6 +926,7 @@ def mult(df):
 
             .. code-block::
 
+                >>> import pandas as pd
                 >>> df = pd.DataFrame({
                 ...     'a': [1, 1, 2, 2],
                 ...     'b': [1, 2, 1, 2],
@@ -1218,10 +1240,12 @@ def describe(self, include=None, exclude=None):
         Examples
         --------
         >>> import cudf
-        >>> gdf = cudf.DataFrame({"Speed": [380.0, 370.0, 24.0, 26.0],
-                                  "Score": [50, 30, 90, 80]})
+        >>> gdf = cudf.DataFrame({
+        ...     "Speed": [380.0, 370.0, 24.0, 26.0],
+        ...      "Score": [50, 30, 90, 80],
+        ... })
         >>> gdf
-        Speed  Score
+           Speed  Score
         0  380.0     50
         1  370.0     30
         2   24.0     90
@@ -1290,7 +1314,7 @@ def corr(self, method="pearson", min_periods=1):
         ...             "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
         ...             "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1]})
         >>> gdf
-        id  val1  val2  val3
+           id  val1  val2  val3
         0  a     5     4     4
         1  a     4     5     5
         2  a     6     6     6
@@ -1652,28 +1676,6 @@ def fillna(
         Returns
         -------
         DataFrame or Series
-
-        .. pandas-compat::
-            **groupby.fillna**
-
-            This function may return result in different format to the method
-            Pandas supports. For example:
-
-            .. code-block::
-
-                >>> df = pd.DataFrame({'k': [1, 1, 2], 'v': [2, None, 4]})
-                >>> gdf = cudf.from_pandas(df)
-                >>> df.groupby('k').fillna({'v': 4}) # pandas
-                       v
-                k
-                1 0  2.0
-                  1  4.0
-                2 2  4.0
-                >>> gdf.groupby('k').fillna({'v': 4}) # cudf
-                     v
-                0  2.0
-                1  4.0
-                2  4.0
         """
         if inplace:
             raise NotImplementedError("Does not support inplace yet.")
diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index dbb5c548166..0da5c6b04d6 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -1,8 +1,9 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 import contextlib
 import doctest
 import inspect
 import io
+import itertools
 import os
 
 import numpy as np
@@ -12,6 +13,9 @@
 
 pytestmark = pytest.mark.filterwarnings("ignore::FutureWarning")
 
+# modules that will be searched for doctests
+tests = [cudf, cudf.core.groupby]
+
 
 def _name_in_all(parent, name):
     return name in getattr(parent, "__all__", [])
@@ -78,7 +82,7 @@ def chdir_to_tmp_path(cls, tmp_path):
 
     @pytest.mark.parametrize(
         "docstring",
-        _find_doctests_in_obj(cudf),
+        itertools.chain(*[_find_doctests_in_obj(mod) for mod in tests]),
         ids=lambda docstring: docstring.name,
     )
     def test_docstring(self, docstring):

From b87b64f1b1bc00b36e83f05187a2eeaaf1229029 Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Tue, 7 Feb 2023 20:11:07 -0500
Subject: [PATCH 04/24] Convert `rank` to use to experimental row comparators
 (#12481)

Converts the `rank` function to use experimental row comparators, which support list and struct types. Part of #11844.

[Throughput benchmarks](https://github.com/rapidsai/cudf/pull/12481#issuecomment-1416384229) are available below. It seems like when `size_bytes` is constrained, the generator generates fewer rows in `list` types for increasing depths. That's why, `depth=4` has a higher throughput than `depth=1` because the number of leaf elements generated are the same, but with much fewer rows.

Authors:
  - Divye Gala (https://github.com/divyegala)
  - Jordan Jacobelli (https://github.com/Ethyling)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12481
---
 cpp/benchmarks/CMakeLists.txt               |   5 +-
 cpp/benchmarks/sort/nested_types_common.hpp |  85 +++
 cpp/benchmarks/sort/rank.cpp                |   4 +-
 cpp/benchmarks/sort/rank_lists.cpp          |  49 ++
 cpp/benchmarks/sort/rank_structs.cpp        |  47 ++
 cpp/benchmarks/sort/rank_types_common.hpp   |  52 ++
 cpp/benchmarks/sort/sort_lists.cpp          |  16 +-
 cpp/benchmarks/sort/sort_structs.cpp        |  52 +-
 cpp/src/sort/rank.cu                        |  39 +-
 cpp/tests/sort/rank_test.cpp                | 556 ++++++++++++++++++--
 10 files changed, 776 insertions(+), 129 deletions(-)
 create mode 100644 cpp/benchmarks/sort/nested_types_common.hpp
 create mode 100644 cpp/benchmarks/sort/rank_lists.cpp
 create mode 100644 cpp/benchmarks/sort/rank_structs.cpp
 create mode 100644 cpp/benchmarks/sort/rank_types_common.hpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 6f67cb32b0a..c5ae3345da5 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -169,7 +169,10 @@ ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
 # ##################################################################################################
 # * sort benchmark --------------------------------------------------------------------------------
 ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp)
-ConfigureNVBench(SORT_NVBENCH sort/segmented_sort.cpp sort/sort_lists.cpp sort/sort_structs.cpp)
+ConfigureNVBench(
+  SORT_NVBENCH sort/rank_lists.cpp sort/rank_structs.cpp sort/segmented_sort.cpp
+  sort/sort_lists.cpp sort/sort_structs.cpp
+)
 
 # ##################################################################################################
 # * quantiles benchmark
diff --git a/cpp/benchmarks/sort/nested_types_common.hpp b/cpp/benchmarks/sort/nested_types_common.hpp
new file mode 100644
index 00000000000..c4851823534
--- /dev/null
+++ b/cpp/benchmarks/sort/nested_types_common.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <random>
+
+inline std::unique_ptr<cudf::table> create_lists_data(nvbench::state& state)
+{
+  const size_t size_bytes(state.get_int64("size_bytes"));
+  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
+  auto const null_frequency{state.get_float64("null_frequency")};
+
+  data_profile table_profile;
+  table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
+  table_profile.set_list_depth(depth);
+  table_profile.set_null_probability(null_frequency);
+  return create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+}
+
+inline std::unique_ptr<cudf::table> create_structs_data(nvbench::state& state,
+                                                        cudf::size_type const n_cols = 1)
+{
+  using Type           = int;
+  using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
+  std::default_random_engine generator;
+  std::uniform_int_distribution<int> distribution(0, 100);
+
+  const cudf::size_type n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
+  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
+  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
+
+  // Create columns with values in the range [0,100)
+  std::vector<column_wrapper> columns;
+  columns.reserve(n_cols);
+  std::generate_n(std::back_inserter(columns), n_cols, [&]() {
+    auto const elements = cudf::detail::make_counting_transform_iterator(
+      0, [&](auto row) { return distribution(generator); });
+    if (!nulls) return column_wrapper(elements, elements + n_rows);
+    auto valids =
+      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
+    return column_wrapper(elements, elements + n_rows, valids);
+  });
+
+  std::vector<std::unique_ptr<cudf::column>> cols;
+  std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
+    return col.release();
+  });
+
+  std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
+  // Nest the child columns in a struct, then nest that struct column inside another
+  // struct column up to the desired depth
+  for (int i = 0; i < depth; i++) {
+    std::vector<bool> struct_validity;
+    std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
+    std::generate_n(
+      std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
+    cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
+    child_cols = std::vector<std::unique_ptr<cudf::column>>{};
+    child_cols.push_back(struct_col.release());
+  }
+
+  // Create table view
+  return std::make_unique<cudf::table>(std::move(child_cols));
+}
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index 2c26f4fa15d..6d0a8e5aedd 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@ static void BM_rank(benchmark::State& state, bool nulls)
   // Create columns with values in the range [0,100)
   data_profile profile = data_profile_builder().cardinality(0).distribution(
     cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
-  profile.set_null_probability(nulls ? std::optional{0.01} : std::nullopt);
+  profile.set_null_probability(nulls ? std::optional{0.2} : std::nullopt);
   auto keys = create_random_column(cudf::type_to_id<Type>(), row_count{n_rows}, profile);
 
   for (auto _ : state) {
diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp
new file mode 100644
index 00000000000..f467b639810
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_lists.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_types_common.hpp"
+#include "rank_types_common.hpp"
+
+#include <cudf/sorting.hpp>
+
+#include <cudf_test/column_utilities.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <cudf::rank_method method>
+void nvbench_rank_lists(nvbench::state& state, nvbench::type_list<nvbench::enum_type<method>>)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const table = create_lists_data(state);
+
+  auto const null_frequency{state.get_float64("null_frequency")};
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::rank(table->view().column(0),
+               method,
+               cudf::order::ASCENDING,
+               null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
+               cudf::null_order::AFTER,
+               rmm::mr::get_current_device_resource());
+  });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_rank_lists, NVBENCH_TYPE_AXES(methods))
+  .set_name("rank_lists")
+  .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
+  .add_int64_axis("depth", {1, 4})
+  .add_float64_axis("null_frequency", {0, 0.2});
diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp
new file mode 100644
index 00000000000..c1e2c5bd7dc
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_structs.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_types_common.hpp"
+#include "rank_types_common.hpp"
+
+#include <cudf/sorting.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <cudf::rank_method method>
+void nvbench_rank_structs(nvbench::state& state, nvbench::type_list<nvbench::enum_type<method>>)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const table = create_structs_data(state);
+
+  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::rank(table->view().column(0),
+               method,
+               cudf::order::ASCENDING,
+               nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
+               cudf::null_order::AFTER,
+               rmm::mr::get_current_device_resource());
+  });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_rank_structs, NVBENCH_TYPE_AXES(methods))
+  .set_name("rank_structs")
+  .add_int64_power_of_two_axis("NumRows", {10, 18, 26})
+  .add_int64_axis("Depth", {0, 1, 8})
+  .add_int64_axis("Nulls", {0, 1});
diff --git a/cpp/benchmarks/sort/rank_types_common.hpp b/cpp/benchmarks/sort/rank_types_common.hpp
new file mode 100644
index 00000000000..adb58606c42
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_types_common.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/aggregation.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+enum class rank_method : int32_t {};
+
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+  cudf::rank_method,
+  [](cudf::rank_method value) {
+    switch (value) {
+      case cudf::rank_method::FIRST: return "FIRST";
+      case cudf::rank_method::AVERAGE: return "AVERAGE";
+      case cudf::rank_method::MIN: return "MIN";
+      case cudf::rank_method::MAX: return "MAX";
+      case cudf::rank_method::DENSE: return "DENSE";
+      default: return "unknown";
+    }
+  },
+  [](cudf::rank_method value) {
+    switch (value) {
+      case cudf::rank_method::FIRST: return "cudf::rank_method::FIRST";
+      case cudf::rank_method::AVERAGE: return "cudf::rank_method::AVERAGE";
+      case cudf::rank_method::MIN: return "cudf::rank_method::MIN";
+      case cudf::rank_method::MAX: return "cudf::rank_method::MAX";
+      case cudf::rank_method::DENSE: return "cudf::rank_method::DENSE";
+      default: return "unknown";
+    }
+  })
+
+using methods = nvbench::enum_type_list<cudf::rank_method::AVERAGE,
+                                        cudf::rank_method::DENSE,
+                                        cudf::rank_method::FIRST,
+                                        cudf::rank_method::MAX,
+                                        cudf::rank_method::MIN>;
diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp
index dac865de479..14cc60cbfe7 100644
--- a/cpp/benchmarks/sort/sort_lists.cpp
+++ b/cpp/benchmarks/sort/sort_lists.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/rmm_pool_raii.hpp>
+#include "nested_types_common.hpp"
 
 #include <cudf/detail/sorting.hpp>
 
@@ -25,16 +24,7 @@ void nvbench_sort_lists(nvbench::state& state)
 {
   cudf::rmm_pool_raii pool_raii;
 
-  const size_t size_bytes(state.get_int64("size_bytes"));
-  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
-  auto const null_frequency{state.get_float64("null_frequency")};
-
-  data_profile table_profile;
-  table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
-  table_profile.set_list_depth(depth);
-  table_profile.set_null_probability(null_frequency);
-  auto const table =
-    create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+  auto const table = create_lists_data(state);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp
index 9b6c32940f5..22a6780c237 100644
--- a/cpp/benchmarks/sort/sort_structs.cpp
+++ b/cpp/benchmarks/sort/sort_structs.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,65 +14,21 @@
  * limitations under the License.
  */
 
-#include <benchmarks/fixture/rmm_pool_raii.hpp>
-
-#include <cudf_test/column_wrapper.hpp>
+#include "nested_types_common.hpp"
 
 #include <cudf/detail/sorting.hpp>
 
 #include <nvbench/nvbench.cuh>
 
-#include <random>
-
 void nvbench_sort_struct(nvbench::state& state)
 {
   cudf::rmm_pool_raii pool_raii;
 
-  using Type           = int;
-  using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
-  std::default_random_engine generator;
-  std::uniform_int_distribution<int> distribution(0, 100);
-
-  const cudf::size_type n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
-  const cudf::size_type n_cols{1};
-  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
-  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
-
-  // Create columns with values in the range [0,100)
-  std::vector<column_wrapper> columns;
-  columns.reserve(n_cols);
-  std::generate_n(std::back_inserter(columns), n_cols, [&]() {
-    auto const elements = cudf::detail::make_counting_transform_iterator(
-      0, [&](auto row) { return distribution(generator); });
-    if (!nulls) return column_wrapper(elements, elements + n_rows);
-    auto valids =
-      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
-    return column_wrapper(elements, elements + n_rows, valids);
-  });
-
-  std::vector<std::unique_ptr<cudf::column>> cols;
-  std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
-    return col.release();
-  });
-
-  std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
-  // Lets add some layers
-  for (int i = 0; i < depth; i++) {
-    std::vector<bool> struct_validity;
-    std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
-    std::generate_n(
-      std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
-    cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
-    child_cols = std::vector<std::unique_ptr<cudf::column>>{};
-    child_cols.push_back(struct_col.release());
-  }
-
-  // Create table view
-  auto const input = cudf::table(std::move(child_cols));
+  auto const input = create_structs_data(state);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
-    cudf::detail::sorted_order(input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
+    cudf::detail::sorted_order(*input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
   });
 }
 
diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu
index 99e99704c10..461e978643f 100644
--- a/cpp/src/sort/rank.cu
+++ b/cpp/src/sort/rank.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sorting.hpp>
 #include <cudf/sorting.hpp>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
@@ -47,37 +47,26 @@
 namespace cudf {
 namespace detail {
 namespace {
-// Functor to identify unique elements in a sorted order table/column
-template <typename ReturnType, typename Iterator>
-struct unique_comparator {
-  unique_comparator(table_device_view device_table, Iterator const sorted_order, bool has_nulls)
-    : comparator(nullate::DYNAMIC{has_nulls}, device_table, device_table, null_equality::EQUAL),
-      permute(sorted_order)
-  {
-  }
-  __device__ ReturnType operator()(size_type index) const noexcept
-  {
-    return index == 0 || not comparator(permute[index], permute[index - 1]);
-  };
-
- private:
-  row_equality_comparator<nullate::DYNAMIC> comparator;
-  Iterator const permute;
-};
 
 // Assign rank from 1 to n unique values. Equal values get same rank value.
 rmm::device_uvector<size_type> sorted_dense_rank(column_view input_col,
                                                  column_view sorted_order_view,
                                                  rmm::cuda_stream_view stream)
 {
-  auto device_table     = table_device_view::create(table_view{{input_col}}, stream);
+  auto const t_input    = table_view{{input_col}};
+  auto const comparator = cudf::experimental::row::equality::self_comparator{t_input, stream};
+  auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)});
+
+  auto const sorted_index_order = thrust::make_permutation_iterator(
+    sorted_order_view.begin<size_type>(), thrust::make_counting_iterator<size_type>(0));
+  auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) {
+    return static_cast<size_type>(index == 0 ||
+                                  not device_comparator(permute[index], permute[index - 1]));
+  };
+  auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv);
+
   auto const input_size = input_col.size();
   rmm::device_uvector<size_type> dense_rank_sorted(input_size, stream);
-  auto sorted_index_order = thrust::make_permutation_iterator(
-    sorted_order_view.begin<size_type>(), thrust::make_counting_iterator<size_type>(0));
-  auto conv = unique_comparator<size_type, decltype(sorted_index_order)>(
-    *device_table, sorted_index_order, input_col.has_nulls());
-  auto unique_it = cudf::detail::make_counting_transform_iterator(0, conv);
 
   thrust::inclusive_scan(
     rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data());
diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp
index 8461b0a1984..2722c1dfdad 100644
--- a/cpp/tests/sort/rank_test.cpp
+++ b/cpp/tests/sort/rank_test.cpp
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
@@ -30,6 +31,13 @@
 #include <tuple>
 #include <vector>
 
+template <typename T>
+using lists_col   = cudf::test::lists_column_wrapper<T, int32_t>;
+using structs_col = cudf::test::structs_column_wrapper;
+
+using cudf::test::iterators::null_at;
+using cudf::test::iterators::nulls_at;
+
 namespace {
 void run_rank_test(cudf::table_view input,
                    cudf::table_view expected,
@@ -50,10 +58,9 @@ void run_rank_test(cudf::table_view input,
 }
 
 using input_arg_t = std::tuple<cudf::order, cudf::null_policy, cudf::null_order>;
-input_arg_t asce_keep{cudf::order::ASCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::AFTER};
-input_arg_t asce_top{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::BEFORE};
-input_arg_t asce_bottom{
-  cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::AFTER};
+input_arg_t asc_keep{cudf::order::ASCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::AFTER};
+input_arg_t asc_top{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::BEFORE};
+input_arg_t asc_bottom{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::AFTER};
 
 input_arg_t desc_keep{
   cudf::order::DESCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::BEFORE};
@@ -105,7 +112,7 @@ TYPED_TEST_SUITE(Rank, cudf::test::NumericTypes);
 
 // fixed_width_column_wrapper<T>   col1{{  5,   4,   3,   5,   8,   5}};
 //                                        3,   2,   1,   4,   6,   5
-TYPED_TEST(Rank, first_asce_keep)
+TYPED_TEST(Rank, first_asc_keep)
 {
   // ASCENDING
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
@@ -113,25 +120,25 @@ TYPED_TEST(Rank, first_asce_keep)
                                                                     {1, 1, 0, 1, 1, 1}};  // KEEP
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, first_asce_top)
+TYPED_TEST(Rank, first_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{
     {3, 2, 1, 4, 6, 5}};  // BEFORE = TOP
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, first_asce_bottom)
+TYPED_TEST(Rank, first_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{
     {2, 1, 6, 3, 5, 4}};  // AFTER  = BOTTOM
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, first_desc_keep)
@@ -163,30 +170,30 @@ TYPED_TEST(Rank, first_desc_bottom)
   this->run_all_tests(cudf::rank_method::FIRST, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_keep)
+TYPED_TEST(Rank, dense_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, -1, 2, 3, 2},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_top)
+TYPED_TEST(Rank, dense_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_bottom)
+TYPED_TEST(Rank, dense_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, 4, 2, 3, 2}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, dense_desc_keep)
@@ -215,30 +222,30 @@ TYPED_TEST(Rank, dense_desc_bottom)
   this->run_all_tests(cudf::rank_method::DENSE, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_keep)
+TYPED_TEST(Rank, min_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, -1, 2, 5, 2},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_top)
+TYPED_TEST(Rank, min_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_bottom)
+TYPED_TEST(Rank, min_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, 6, 2, 5, 2}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, min_desc_keep)
@@ -267,30 +274,30 @@ TYPED_TEST(Rank, min_desc_bottom)
   this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_keep)
+TYPED_TEST(Rank, max_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{4, 1, -1, 4, 5, 4},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_top)
+TYPED_TEST(Rank, max_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_bottom)
+TYPED_TEST(Rank, max_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{4, 1, 6, 4, 5, 4}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, max_desc_keep)
@@ -319,28 +326,28 @@ TYPED_TEST(Rank, max_desc_bottom)
   this->run_all_tests(cudf::rank_method::MAX, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_keep)
+TYPED_TEST(Rank, average_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{3, 1, -1, 3, 5, 3}, {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}, {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_top)
+TYPED_TEST(Rank, average_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_bottom)
+TYPED_TEST(Rank, average_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{3, 1, 6, 3, 5, 3}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, average_desc_keep)
@@ -368,30 +375,30 @@ TYPED_TEST(Rank, average_desc_bottom)
 }
 
 // percentage==true (dense, not-dense)
-TYPED_TEST(Rank, dense_asce_keep_pct)
+TYPED_TEST(Rank, dense_asc_keep_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{
     {2.0 / 3.0, 1.0 / 3.0, -1., 2.0 / 3.0, 1., 2.0 / 3.0}, {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5},
                                                            {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_keep, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col1_rank, col2_rank, col3_rank, true);
 }
 
-TYPED_TEST(Rank, dense_asce_top_pct)
+TYPED_TEST(Rank, dense_asc_top_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_top, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col1_rank, col2_rank, col3_rank, true);
 }
 
-TYPED_TEST(Rank, dense_asce_bottom_pct)
+TYPED_TEST(Rank, dense_asc_bottom_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{0.5, 0.25, 1., 0.5, 0.75, 0.5}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_bottom, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col1_rank, col2_rank, col3_rank, true);
 }
 
 TYPED_TEST(Rank, min_desc_keep_pct)
@@ -444,3 +451,472 @@ TEST_F(RankLarge, average_large)
   cudf::test::fixed_width_column_wrapper<double, int> expected(iter + 1, iter + 10559);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);
 }
+
+template <typename T>
+struct RankListAndStruct : public cudf::test::BaseFixture {
+  void run_all_tests(cudf::rank_method method,
+                     input_arg_t input_arg,
+                     cudf::column_view const list_rank,
+                     cudf::column_view const struct_rank,
+                     bool percentage = false)
+  {
+    if constexpr (std::is_same_v<T, bool>) { return; }
+    /*
+    [
+      [],
+      [1],
+      [2, 2],
+      [2, 3],
+      [2, 2],
+      [1],
+      [],
+      NULL
+      [2],
+      NULL,
+      [1]
+    ]
+    */
+    auto list_col =
+      lists_col<T>{{{}, {1}, {2, 2}, {2, 3}, {2, 2}, {1}, {}, {} /*NULL*/, {2}, {} /*NULL*/, {1}},
+                   nulls_at({7, 9})};
+
+    // clang-format off
+    /*
+      +------------+
+      |           s|
+      +------------+
+    0 |   {0, null}|
+    1 |   {1, null}|
+    2 |        null|
+    3 |{null, null}|
+    4 |        null|
+    5 |{null, null}|
+    6 |   {null, 1}|
+    7 |   {null, 0}|
+      +------------+
+    */
+    std::vector<bool>                           struct_valids{1, 1, 0, 1, 0, 1, 1, 1};
+    auto col1       = cudf::test::fixed_width_column_wrapper<T>{{ 0,  1,  9, -1,  9, -1, -1, -1}, {1, 1, 1, 0, 1, 0, 0, 0}};
+    auto col2       = cudf::test::fixed_width_column_wrapper<T>{{-1, -1,  9, -1,  9, -1,  1,  0}, {0, 0, 1, 0, 1, 0, 1, 1}};
+    auto struct_col = cudf::test::structs_column_wrapper{{col1, col2}, struct_valids}.release();
+    // clang-format on
+
+    for (auto const& test_case : {
+           // Non-null column
+           test_case_t{cudf::table_view{{list_col}}, cudf::table_view{{list_rank}}},
+           // Null column
+           test_case_t{cudf::table_view{{struct_col->view()}}, cudf::table_view{{struct_rank}}},
+         }) {
+      auto [input, output] = test_case;
+
+      run_rank_test(input,
+                    output,
+                    method,
+                    std::get<0>(input_arg),
+                    std::get<1>(input_arg),
+                    std::get<2>(input_arg),
+                    percentage);
+    }
+  }
+};
+
+TYPED_TEST_SUITE(RankListAndStruct, cudf::test::NumericTypes);
+
+TYPED_TEST(RankListAndStruct, first_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> list_rank{
+    {1, 3, 7, 9, 8, 4, 2, -1, 6, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 6, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_keep, list_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    3, 5, 9, 11, 10, 6, 4, 1, 8, 2, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 1, 3, 2, 4, 6, 5};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    1, 3, 7, 9, 8, 4, 2, 10, 6, 11, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 7, 5, 8, 6, 4, 3};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {8, 5, 2, 1, 3, 6, 9, -1, 4, -1, 7}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 6, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    10, 7, 4, 3, 5, 8, 11, 1, 6, 2, 9};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 1, 3, 2, 4, 5, 6};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    8, 5, 2, 1, 3, 6, 9, 10, 4, 11, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 7, 5, 8, 6, 3, 4};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {1, 2, 4, 5, 4, 2, 1, -1, 3, -1, 2}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 5, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{2, 3, 5, 6, 5, 3, 2, 1, 4, 1, 3};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{5, 6, 1, 2, 1, 2, 4, 3};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{1, 2, 4, 5, 4, 2, 1, 6, 3, 6, 2};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 6, 5, 6, 5, 4, 3};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {5, 4, 2, 1, 2, 4, 5, -1, 3, -1, 4}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 5, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{6, 5, 3, 2, 3, 5, 6, 1, 4, 1, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{6, 5, 1, 2, 1, 2, 3, 4};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{5, 4, 2, 1, 2, 4, 5, 6, 3, 6, 4};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 6, 5, 6, 5, 3, 4};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {1, 3, 7, 9, 7, 3, 1, -1, 6, -1, 3}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 5, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    3, 5, 9, 11, 9, 5, 3, 1, 8, 1, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 1, 3, 1, 3, 6, 5};
+  this->run_all_tests(cudf::rank_method::MIN, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    1, 3, 7, 9, 7, 3, 1, 10, 6, 10, 3};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 7, 5, 7, 5, 4, 3};
+  this->run_all_tests(cudf::rank_method::MIN, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {8, 5, 2, 1, 2, 5, 8, -1, 4, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 5, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    10, 7, 4, 3, 4, 7, 10, 1, 6, 1, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 1, 3, 1, 3, 5, 6};
+  this->run_all_tests(cudf::rank_method::MIN, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    8, 5, 2, 1, 2, 5, 8, 10, 4, 10, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 7, 5, 7, 5, 3, 4};
+  this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {2, 5, 8, 9, 8, 5, 2, -1, 6, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 6, -1, 6, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MAX, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    4, 7, 10, 11, 10, 7, 4, 2, 8, 2, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 2, 4, 2, 4, 6, 5};
+  this->run_all_tests(cudf::rank_method::MAX, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    2, 5, 8, 9, 8, 5, 2, 11, 6, 11, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 8, 6, 8, 6, 4, 3};
+  this->run_all_tests(cudf::rank_method::MAX, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {9, 7, 3, 1, 3, 7, 9, -1, 4, -1, 7}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 6, -1, 6, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MAX, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    11, 9, 5, 3, 5, 9, 11, 2, 6, 2, 9};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 2, 4, 2, 4, 5, 6};
+  this->run_all_tests(cudf::rank_method::MAX, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    9, 7, 3, 1, 3, 7, 9, 11, 4, 11, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 8, 6, 8, 6, 3, 4};
+  this->run_all_tests(cudf::rank_method::MAX, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    {1.5, 4.0, 7.5, 9.0, 7.5, 4.0, 1.5, -1.0, 6.0, -1.0, 4.0}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {1.0, 2.0, -1.0, 5.5, -1.0, 5.5, 4.0, 3.0}, nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    3.5, 6.0, 9.5, 11.0, 9.5, 6.0, 3.5, 1.5, 8.0, 1.5, 6.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    7.0, 8.0, 1.5, 3.5, 1.5, 3.5, 6.0, 5.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    1.5, 4.0, 7.5, 9.0, 7.5, 4.0, 1.5, 10.5, 6.0, 10.5, 4.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0, 2.0, 7.5, 5.5, 7.5, 5.5, 4.0, 3.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    {8.5, 6.0, 2.5, 1.0, 2.5, 6.0, 8.5, -1.0, 4.0, -1.0, 6.0}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {2.0, 1.0, -1.0, 5.5, -1.0, 5.5, 3.0, 4.0}, nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    10.5, 8.0, 4.5, 3.0, 4.5, 8.0, 10.5, 1.5, 6.0, 1.5, 8.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    8.0, 7.0, 1.5, 3.5, 1.5, 3.5, 5.0, 6.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    8.5, 6.0, 2.5, 1.0, 2.5, 6.0, 8.5, 10.5, 4.0, 10.5, 6.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    2.0, 1.0, 7.5, 5.5, 7.5, 5.5, 3.0, 4.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_keep_pct)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{{1.0 / 5.0,
+                                                           2.0 / 5.0,
+                                                           4.0 / 5.0,
+                                                           1.0,
+                                                           4.0 / 5.0,
+                                                           2.0 / 5.0,
+                                                           1.0 / 5.0,
+                                                           -1.0,
+                                                           3.0 / 5.0,
+                                                           -1.0,
+                                                           2.0 / 5.0},
+                                                          nulls_at({7, 9})};
+
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {1.0 / 5.0, 2.0 / 5.0, -1.0, 1.0, -1.0, 1.0, 4.0 / 5.0, 3.0 / 5.0}, nulls_at({2, 4})};
+
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_top_pct)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{1.0 / 3.0,
+                                                          1.0 / 2.0,
+                                                          5.0 / 6.0,
+                                                          1.0,
+                                                          5.0 / 6.0,
+                                                          1.0 / 2.0,
+                                                          1.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          2.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          1.0 / 2.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    5.0 / 6.0, 1.0, 1.0 / 6.0, 2.0 / 6.0, 1.0 / 6.0, 2.0 / 6.0, 4.0 / 6.0, 3.0 / 6.0};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_bottom_pct)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{1.0 / 6.0,
+                                                          1.0 / 3.0,
+                                                          2.0 / 3.0,
+                                                          5.0 / 6.0,
+                                                          2.0 / 3.0,
+                                                          1.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          1.0,
+                                                          1.0 / 2.0,
+                                                          1.0,
+                                                          1.0 / 3.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0 / 6.0, 2.0 / 6.0, 1.0, 5.0 / 6.0, 1.0, 5.0 / 6.0, 4.0 / 6.0, 3.0 / 6.0};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_keep_pct)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{{8.0 / 9.0,
+                                                           5.0 / 9.0,
+                                                           2.0 / 9.0,
+                                                           1.0 / 9.0,
+                                                           2.0 / 9.0,
+                                                           5.0 / 9.0,
+                                                           8.0 / 9.0,
+                                                           -1.0,
+                                                           4.0 / 9.0,
+                                                           -1.0,
+                                                           5.0 / 9.0},
+                                                          nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {2.0 / 6.0, 1.0 / 6.0, -1.0, 5.0 / 6.0, -1.0, 5.0 / 6.0, 3.0 / 6.0, 4.0 / 6.0},
+    nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, desc_keep, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_top_pct)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{10.0 / 11.0,
+                                                          7.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          3.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          7.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          6.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          7.0 / 11.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0, 7.0 / 8.0, 1.0 / 8.0, 3.0 / 8.0, 1.0 / 8.0, 3.0 / 8.0, 5.0 / 8.0, 6.0 / 8.0};
+  this->run_all_tests(cudf::rank_method::MIN, desc_top, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_bottom_pct)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{8.0 / 11.0,
+                                                          5.0 / 11.0,
+                                                          2.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          2.0 / 11.0,
+                                                          5.0 / 11.0,
+                                                          8.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          5.0 / 11.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    2.0 / 8.0, 1.0 / 8.0, 7.0 / 8.0, 5.0 / 8.0, 7.0 / 8.0, 5.0 / 8.0, 3.0 / 8.0, 4.0 / 8.0};
+  this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col_rank, struct_rank, true);
+}

From fea6288f5f114790fd7d075a6b6c26b2f78a8316 Mon Sep 17 00:00:00 2001
From: Cindy Jiang <47068112+cindyyuanjiang@users.noreply.github.com>
Date: Tue, 7 Feb 2023 17:15:28 -0800
Subject: [PATCH 05/24] Add `regex_program` strings splitting java APIs and
 tests (#12713)

This PR adds [split_re, rsplit_re, split_record_re, rsplit_record_re](https://docs.rapids.ai/api/libcudf/nightly/split__re_8hpp.html) related `regex_program` java APIs and unit tests.
Part of work for https://github.com/NVIDIA/spark-rapids/issues/7295.

Authors:
  - Cindy Jiang (https://github.com/cindyyuanjiang)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12713
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 152 +++++++++++++++---
 java/src/main/native/src/ColumnViewJni.cpp    |  98 +++++++----
 .../java/ai/rapids/cudf/ColumnVectorTest.java |  60 ++++---
 3 files changed, 234 insertions(+), 76 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 4daa3c17cfc..2d0bf28225f 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2531,12 +2531,34 @@ public final ColumnVector stringLocate(Scalar substring, int start, int end) {
    *                     regular expression pattern or just by a string literal delimiter.
    * @return list of strings columns as a table.
    */
+  @Deprecated
   public final Table stringSplit(String pattern, int limit, boolean splitByRegex) {
+    if (splitByRegex) {
+      return stringSplit(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit);
+    } else {
+      return stringSplit(pattern, limit);
+    }
+  }
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regex program pattern.
+   * The number of rows in the output columns will be the same as the input column. Null entries
+   * are added for the rows where split results have been exhausted. Null input entries result in
+   * all nulls in the corresponding rows of the output columns.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   * @return list of strings columns as a table.
+   */
+  public final Table stringSplit(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be a String";
-    assert pattern != null : "pattern is null";
-    assert pattern.length() > 0 : "empty pattern is not supported";
+    assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
-    return new Table(stringSplit(this.getNativeView(), pattern, limit, splitByRegex));
+    return new Table(stringSplitRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                   regexProg.capture().nativeId, limit));
   }
 
   /**
@@ -2550,6 +2572,7 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex)
    *                     regular expression pattern or just by a string literal delimiter.
    * @return list of strings columns as a table.
    */
+  @Deprecated
   public final Table stringSplit(String pattern, boolean splitByRegex) {
     return stringSplit(pattern, -1, splitByRegex);
   }
@@ -2567,7 +2590,10 @@ public final Table stringSplit(String pattern, boolean splitByRegex) {
    * @return list of strings columns as a table.
    */
   public final Table stringSplit(String delimiter, int limit) {
-    return stringSplit(delimiter, limit, false);
+    assert type.equals(DType.STRING) : "column type must be a String";
+    assert delimiter != null : "delimiter is null";
+    assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
+    return new Table(stringSplit(this.getNativeView(), delimiter, limit));
   }
 
   /**
@@ -2580,7 +2606,21 @@ public final Table stringSplit(String delimiter, int limit) {
    * @return list of strings columns as a table.
    */
   public final Table stringSplit(String delimiter) {
-    return stringSplit(delimiter, -1, false);
+    return stringSplit(delimiter, -1);
+  }
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regex program pattern.
+   * The number of rows in the output columns will be the same as the input column. Null entries
+   * are added for the rows where split results have been exhausted. Null input entries result in
+   * all nulls in the corresponding rows of the output columns.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @return list of strings columns as a table.
+   */
+  public final Table stringSplit(RegexProgram regexProg) {
+    return stringSplit(regexProg, -1);
   }
 
   /**
@@ -2595,13 +2635,33 @@ public final Table stringSplit(String delimiter) {
    *                     regular expression pattern or just by a string literal delimiter.
    * @return a LIST column of string elements.
    */
+  @Deprecated
   public final ColumnVector stringSplitRecord(String pattern, int limit, boolean splitByRegex) {
+    if (splitByRegex) {
+      return stringSplitRecord(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit);
+    } else {
+      return stringSplitRecord(pattern, limit);
+    }
+  }
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regex program pattern.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   * @return a LIST column of string elements.
+   */
+  public final ColumnVector stringSplitRecord(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be String";
-    assert pattern != null : "pattern is null";
-    assert pattern.length() > 0 : "empty pattern is not supported";
+    assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
     return new ColumnVector(
-        stringSplitRecord(this.getNativeView(), pattern, limit, splitByRegex));
+        stringSplitRecordRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                            regexProg.capture().nativeId, limit));
   }
 
   /**
@@ -2613,6 +2673,7 @@ public final ColumnVector stringSplitRecord(String pattern, int limit, boolean s
    *                     regular expression pattern or just by a string literal delimiter.
    * @return a LIST column of string elements.
    */
+  @Deprecated
   public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex) {
     return stringSplitRecord(pattern, -1, splitByRegex);
   }
@@ -2628,7 +2689,10 @@ public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex
    * @return a LIST column of string elements.
    */
   public final ColumnVector stringSplitRecord(String delimiter, int limit) {
-    return stringSplitRecord(delimiter, limit, false);
+    assert type.equals(DType.STRING) : "column type must be String";
+    assert delimiter != null : "delimiter is null";
+    assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
+    return new ColumnVector(stringSplitRecord(this.getNativeView(), delimiter, limit));
   }
 
   /**
@@ -2639,7 +2703,19 @@ public final ColumnVector stringSplitRecord(String delimiter, int limit) {
    * @return a LIST column of string elements.
    */
   public final ColumnVector stringSplitRecord(String delimiter) {
-    return stringSplitRecord(delimiter, -1, false);
+    return stringSplitRecord(delimiter, -1);
+  }
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regex program pattern.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @return a LIST column of string elements.
+   */
+  public final ColumnVector stringSplitRecord(RegexProgram regexProg) {
+    return stringSplitRecord(regexProg, -1);
   }
 
   /**
@@ -3958,36 +4034,64 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
   private static native long substringLocate(long columnView, long substringScalar, int start, int end);
 
   /**
-   * Returns a list of columns by splitting each string using the specified pattern. The number of
-   * rows in the output columns will be the same as the input column. Null entries are added for a
-   * row where split results have been exhausted. Null input entries result in all nulls in the
-   * corresponding rows of the output columns.
+   * Returns a list of columns by splitting each string using the specified string literal
+   * delimiter. The number of rows in the output columns will be the same as the input column.
+   * Null entries are added for the rows where split results have been exhausted. Null input entries
+   * result in all nulls in the corresponding rows of the output columns.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
-   * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   */
+  private static native long[] stringSplit(long nativeHandle, String delimiter, int limit);
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regular expression
+   * pattern. The number of rows in the output columns will be the same as the input column.
+   * Null entries are added for the rows where split results have been exhausted. Null input entries
+   * result in all nulls in the corresponding rows of the output columns.
+   *
+   * @param nativeHandle native handle of the input strings column that being operated on.
+   * @param pattern UTF-8 encoded string identifying the split regular expression pattern for
+   *                each input string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
-   * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
-   *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long[] stringSplit(long nativeHandle, String pattern, int limit,
-                                           boolean splitByRegex);
+  private static native long[] stringSplitRe(long nativeHandle, String pattern, int flags,
+                                             int capture, int limit);
 
   /**
    * Returns a column that are lists of strings in which each list is made by splitting the
    * corresponding input string using the specified string literal delimiter.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
-   * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   */
+  private static native long stringSplitRecord(long nativeHandle, String delimiter, int limit);
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regular expression pattern.
+   *
+   * @param nativeHandle native handle of the input strings column that being operated on.
+   * @param pattern UTF-8 encoded string identifying the split regular expression pattern for
+   *                each input string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
-   * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
-   *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long stringSplitRecord(long nativeHandle, String pattern, int limit,
-                                               boolean splitByRegex);
+  private static native long stringSplitRecordRe(long nativeHandle, String pattern, int flags,
+                                                 int capture, int limit);
 
   /**
    * Native method to calculate substring from a given string column. 0 indexing.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index bfa3fa0a522..958efd364ed 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -681,9 +681,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *env, jclass,
                                                                         jlong input_handle,
-                                                                        jstring pattern_obj,
-                                                                        jint limit,
-                                                                        jboolean split_by_regex) {
+                                                                        jstring delimiter_obj,
+                                                                        jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -697,21 +696,42 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const input = reinterpret_cast<cudf::column_view *>(input_handle);
-    auto const strs_input = cudf::strings_column_view{*input};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
+    auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
+    auto const max_split = limit > 1 ? limit - 1 : limit;
+    auto result = cudf::strings::split(strings_column, cudf::string_scalar{delimiter}, max_split);
+    return cudf::jni::convert_table_for_return(env, std::move(result));
+  }
+  CATCH_STD(env, 0);
+}
 
-    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
-    if (pattern_jstr.is_empty()) {
-      // Java's split API produces different behaviors than cudf when splitting with empty
-      // pattern.
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0);
-    }
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe(
+    JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jint limit) {
+  JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
+  if (limit == 0 || limit == 1) {
+    // Cannot achieve the results of splitting with limit == 0 or limit == 1.
+    // This is because cudf operates on a different parameter (`max_split`) which is converted from
+    // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an
+    // unlimited split.
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "limit == 0 and limit == 1 are not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result = split_by_regex ?
-                      cudf::strings::split_re(strs_input, pattern, max_split) :
-                      cudf::strings::split(strs_input, cudf::string_scalar{pattern}, max_split);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+    auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split);
     return cudf::jni::convert_table_for_return(env, std::move(result));
   }
   CATCH_STD(env, 0);
@@ -719,9 +739,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv *env, jclass,
                                                                          jlong input_handle,
-                                                                         jstring pattern_obj,
-                                                                         jint limit,
-                                                                         jboolean split_by_regex) {
+                                                                         jstring delimiter_obj,
+                                                                         jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -735,22 +754,43 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const input = reinterpret_cast<cudf::column_view *>(input_handle);
-    auto const strs_input = cudf::strings_column_view{*input};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
+    auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
+    auto const max_split = limit > 1 ? limit - 1 : limit;
+    auto result =
+        cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split);
+    return release_as_jlong(result);
+  }
+  CATCH_STD(env, 0);
+}
 
-    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
-    if (pattern_jstr.is_empty()) {
-      // Java's split API produces different behaviors than cudf when splitting with empty
-      // pattern.
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0);
-    }
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecordRe(
+    JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jint limit) {
+  JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
+  if (limit == 0 || limit == 1) {
+    // Cannot achieve the results of splitting with limit == 0 or limit == 1.
+    // This is because cudf operates on a different parameter (`max_split`) which is converted from
+    // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an
+    // unlimited split.
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "limit == 0 and limit == 1 are not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result =
-        split_by_regex ?
-            cudf::strings::split_record_re(strs_input, pattern, max_split) :
-            cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+    auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split);
     return release_as_jlong(result);
   }
   CATCH_STD(env, 0);
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 46264b7d668..ab4baf74277 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4990,28 +4990,29 @@ void testReverseList() {
   void testStringSplit() {
     String pattern = " ";
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
-        "ARé some things", "test strings here");
+             "ARé some things", "test strings here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
-         .column("Héllo", "thésé", null, "", "ARé", "test")
-         .column("there all", null, null, null, "some things", "strings here")
-         .build();
+             .column("Héllo", "thésé", null, "", "ARé", "test")
+             .column("there all", null, null, null, "some things", "strings here")
+             .build();
          Table expectedSplitAll = new Table.TestBuilder()
-         .column("Héllo", "thésé", null, "", "ARé", "test")
-         .column("there", null, null, null, "some", "strings")
-         .column("all", null, null, null, "things", "here")
-         .build();
+             .column("Héllo", "thésé", null, "", "ARé", "test")
+             .column("there", null, null, null, "some", "strings")
+             .column("all", null, null, null, "things", "here")
+             .build();
          Table resultSplitLimit2 = v.stringSplit(pattern, 2);
          Table resultSplitAll = v.stringSplit(pattern)) {
-          assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-          assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+      assertTablesAreEqual(expectedSplitAll, resultSplitAll);
     }
   }
 
   @Test
   void testStringSplitByRegularExpression() {
     String pattern = "[_ ]";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "",
-        "ARé some_things", "test_strings_here");
+             "ARé some_things", "test_strings_here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there all", null, null, null, "some_things", "strings_here")
@@ -5020,11 +5021,17 @@ void testStringSplitByRegularExpression() {
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there", null, null, null, "some", "strings")
              .column("all", null, null, null, "things", "here")
-             .build();
-         Table resultSplitLimit2 = v.stringSplit(pattern, 2, true);
-         Table resultSplitAll = v.stringSplit(pattern, true)) {
-      assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-      assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+             .build()) {
+      try (Table resultSplitLimit2 = v.stringSplit(pattern, 2, true);
+           Table resultSplitAll = v.stringSplit(pattern, true)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2);
+           Table resultSplitAll = v.stringSplit(regexProg)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 
@@ -5032,7 +5039,7 @@ void testStringSplitByRegularExpression() {
   void testStringSplitRecord() {
     String pattern = " ";
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
-        "ARé some things", "test strings here");
+             "ARé some things", "test strings here");
          ColumnVector expectedSplitLimit2 = ColumnVector.fromLists(
              new HostColumnVector.ListType(true,
                  new HostColumnVector.BasicType(true, DType.STRING)),
@@ -5061,8 +5068,9 @@ void testStringSplitRecord() {
   @Test
   void testStringSplitRecordByRegularExpression() {
     String pattern = "[_ ]";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "",
-        "ARé some_things", "test_strings_here");
+             "ARé some_things", "test_strings_here");
          ColumnVector expectedSplitLimit2 = ColumnVector.fromLists(
              new HostColumnVector.ListType(true,
                  new HostColumnVector.BasicType(true, DType.STRING)),
@@ -5080,11 +5088,17 @@ void testStringSplitRecordByRegularExpression() {
              null,
              Arrays.asList(""),
              Arrays.asList("ARé", "some", "things"),
-             Arrays.asList("test", "strings", "here"));
-         ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true);
-         ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) {
-      assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
-      assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+             Arrays.asList("test", "strings", "here"))) {
+      try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true);
+           ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) {
+        assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2);
+           ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) {
+        assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 

From b8ae0e4b41c541c5b2b27417af30fa1b9afcbdce Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Tue, 7 Feb 2023 23:33:56 -0600
Subject: [PATCH 06/24] Support conversion to/from cudf in
 dask.dataframe.core.to_backend (#12380)

This PR corresponds to the `cudf` component of https://github.com/dask/dask/pull/9758

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12380
---
 python/dask_cudf/dask_cudf/backends.py        | 194 +++++++++++-------
 python/dask_cudf/dask_cudf/tests/test_core.py |  55 ++++-
 2 files changed, 170 insertions(+), 79 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index b6be5ade6ba..821ec103204 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -11,6 +11,10 @@
 
 import dask.dataframe as dd
 from dask import config
+from dask.dataframe.backends import (
+    DataFrameBackendEntrypoint,
+    PandasBackendEntrypoint,
+)
 from dask.dataframe.core import get_parallel_type, meta_nonempty
 from dask.dataframe.dispatch import (
     categorical_dtype_dispatch,
@@ -30,7 +34,7 @@
     make_meta_obj,
 )
 from dask.sizeof import sizeof as sizeof_dispatch
-from dask.utils import is_arraylike
+from dask.utils import Dispatch, is_arraylike
 
 import cudf
 from cudf.api.types import is_string_dtype
@@ -446,91 +450,127 @@ def _default_backend(func, *args, **kwargs):
         return func(*args, **kwargs)
 
 
-try:
+def _unsupported_kwargs(old, new, kwargs):
+    # Utility to raise a meaningful error when
+    # unsupported kwargs are encountered within
+    # ``to_backend_dispatch``
+    if kwargs:
+        raise ValueError(
+            f"Unsupported key-word arguments used in `to_backend` "
+            f"for {old}-to-{new} conversion: {kwargs}"
+        )
 
-    # Define "cudf" backend engine to be registered with Dask
-    from dask.dataframe.backends import DataFrameBackendEntrypoint
-
-    class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
-        """Backend-entrypoint class for Dask-DataFrame
-
-        This class is registered under the name "cudf" for the
-        ``dask.dataframe.backends`` entrypoint in ``setup.cfg``.
-        Dask-DataFrame will use the methods defined in this class
-        in place of ``dask.dataframe.<creation-method>`` when the
-        "dataframe.backend" configuration is set to "cudf":
-
-        Examples
-        --------
-        >>> import dask
-        >>> import dask.dataframe as dd
-        >>> with dask.config.set({"dataframe.backend": "cudf"}):
-        ...     ddf = dd.from_dict({"a": range(10)})
-        >>> type(ddf)
-        <class 'dask_cudf.core.DataFrame'>
-        """
-
-        @staticmethod
-        def from_dict(
-            data,
-            npartitions,
-            orient="columns",
-            dtype=None,
-            columns=None,
-            constructor=cudf.DataFrame,
-        ):
-
-            return _default_backend(
-                dd.from_dict,
-                data,
-                npartitions=npartitions,
-                orient=orient,
-                dtype=dtype,
-                columns=columns,
-                constructor=constructor,
-            )
 
-        @staticmethod
-        def read_parquet(*args, engine=None, **kwargs):
-            from dask_cudf.io.parquet import CudfEngine
+# Register cudf->pandas
+to_pandas_dispatch = PandasBackendEntrypoint.to_backend_dispatch()
 
-            return _default_backend(
-                dd.read_parquet,
-                *args,
-                engine=CudfEngine,
-                **kwargs,
-            )
 
-        @staticmethod
-        def read_json(*args, **kwargs):
-            from dask_cudf.io.json import read_json
+@to_pandas_dispatch.register((cudf.DataFrame, cudf.Series, cudf.Index))
+def to_pandas_dispatch_from_cudf(data, nullable=False, **kwargs):
+    _unsupported_kwargs("cudf", "pandas", kwargs)
+    return data.to_pandas(nullable=nullable)
 
-            return read_json(*args, **kwargs)
 
-        @staticmethod
-        def read_orc(*args, **kwargs):
-            from dask_cudf.io import read_orc
+# Register pandas->cudf
+to_cudf_dispatch = Dispatch("to_cudf_dispatch")
 
-            return read_orc(*args, **kwargs)
 
-        @staticmethod
-        def read_csv(*args, **kwargs):
-            from dask_cudf.io import read_csv
+@to_cudf_dispatch.register((pd.DataFrame, pd.Series, pd.Index))
+def to_cudf_dispatch_from_pandas(data, nan_as_null=None, **kwargs):
+    _unsupported_kwargs("pandas", "cudf", kwargs)
+    return cudf.from_pandas(data, nan_as_null=nan_as_null)
 
-            return read_csv(*args, **kwargs)
 
-        @staticmethod
-        def read_hdf(*args, **kwargs):
-            from dask_cudf import from_dask_dataframe
+# Define "cudf" backend engine to be registered with Dask
+class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
+    """Backend-entrypoint class for Dask-DataFrame
 
-            # HDF5 reader not yet implemented in cudf
-            warnings.warn(
-                "read_hdf is not yet implemented in cudf/dask_cudf. "
-                "Moving to cudf from pandas. Expect poor performance!"
-            )
-            return from_dask_dataframe(
-                _default_backend(dd.read_hdf, *args, **kwargs)
-            )
+    This class is registered under the name "cudf" for the
+    ``dask.dataframe.backends`` entrypoint in ``setup.cfg``.
+    Dask-DataFrame will use the methods defined in this class
+    in place of ``dask.dataframe.<creation-method>`` when the
+    "dataframe.backend" configuration is set to "cudf":
 
-except ImportError:
-    pass
+    Examples
+    --------
+    >>> import dask
+    >>> import dask.dataframe as dd
+    >>> with dask.config.set({"dataframe.backend": "cudf"}):
+    ...     ddf = dd.from_dict({"a": range(10)})
+    >>> type(ddf)
+    <class 'dask_cudf.core.DataFrame'>
+    """
+
+    @classmethod
+    def to_backend_dispatch(cls):
+        return to_cudf_dispatch
+
+    @classmethod
+    def to_backend(cls, data: dd.core._Frame, **kwargs):
+        if isinstance(data._meta, (cudf.DataFrame, cudf.Series, cudf.Index)):
+            # Already a cudf-backed collection
+            _unsupported_kwargs("cudf", "cudf", kwargs)
+            return data
+        return data.map_partitions(cls.to_backend_dispatch(), **kwargs)
+
+    @staticmethod
+    def from_dict(
+        data,
+        npartitions,
+        orient="columns",
+        dtype=None,
+        columns=None,
+        constructor=cudf.DataFrame,
+    ):
+
+        return _default_backend(
+            dd.from_dict,
+            data,
+            npartitions=npartitions,
+            orient=orient,
+            dtype=dtype,
+            columns=columns,
+            constructor=constructor,
+        )
+
+    @staticmethod
+    def read_parquet(*args, engine=None, **kwargs):
+        from dask_cudf.io.parquet import CudfEngine
+
+        return _default_backend(
+            dd.read_parquet,
+            *args,
+            engine=CudfEngine,
+            **kwargs,
+        )
+
+    @staticmethod
+    def read_json(*args, **kwargs):
+        from dask_cudf.io.json import read_json
+
+        return read_json(*args, **kwargs)
+
+    @staticmethod
+    def read_orc(*args, **kwargs):
+        from dask_cudf.io import read_orc
+
+        return read_orc(*args, **kwargs)
+
+    @staticmethod
+    def read_csv(*args, **kwargs):
+        from dask_cudf.io import read_csv
+
+        return read_csv(*args, **kwargs)
+
+    @staticmethod
+    def read_hdf(*args, **kwargs):
+        from dask_cudf import from_dask_dataframe
+
+        # HDF5 reader not yet implemented in cudf
+        warnings.warn(
+            "read_hdf is not yet implemented in cudf/dask_cudf. "
+            "Moving to cudf from pandas. Expect poor performance!"
+        )
+        return from_dask_dataframe(
+            _default_backend(dd.read_hdf, *args, **kwargs)
+        )
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index ee8229bc7e8..7f8876c8564 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from packaging import version
 
 import dask
 from dask import dataframe as dd
@@ -31,6 +32,58 @@ def test_from_dict_backend_dispatch():
     dd.assert_eq(expect, ddf)
 
 
+def test_to_backend():
+    np.random.seed(0)
+    data = {
+        "x": np.random.randint(0, 5, size=10000),
+        "y": np.random.normal(size=10000),
+    }
+    with dask.config.set({"dataframe.backend": "pandas"}):
+        ddf = dd.from_dict(data, npartitions=2)
+        assert isinstance(ddf._meta, pd.DataFrame)
+
+        gdf = ddf.to_backend("cudf")
+        assert isinstance(gdf, dgd.DataFrame)
+        dd.assert_eq(cudf.DataFrame(data), ddf)
+
+        assert isinstance(gdf.to_backend()._meta, pd.DataFrame)
+
+
+def test_to_backend_kwargs():
+    data = {"x": [0, 2, np.nan, 3, 4, 5]}
+    with dask.config.set({"dataframe.backend": "pandas"}):
+        dser = dd.from_dict(data, npartitions=2)["x"]
+        assert isinstance(dser._meta, pd.Series)
+
+        # Using `nan_as_null=False` will result in a cudf-backed
+        # Series with a NaN element (ranther than <NA>)
+        gser_nan = dser.to_backend("cudf", nan_as_null=False)
+        assert isinstance(gser_nan, dgd.Series)
+        assert np.isnan(gser_nan.compute()).sum() == 1
+
+        # Using `nan_as_null=True` will result in a cudf-backed
+        # Series with a <NA> element (ranther than NaN)
+        gser_null = dser.to_backend("cudf", nan_as_null=True)
+        assert isinstance(gser_null, dgd.Series)
+        assert np.isnan(gser_null.compute()).sum() == 0
+
+        # Check `nullable` argument for `cudf.Series.to_pandas`
+        dser_null = gser_null.to_backend("pandas", nullable=False)
+        assert dser_null.compute().dtype == "float"
+        dser_null = gser_null.to_backend("pandas", nullable=True)
+        assert isinstance(dser_null.compute().dtype, pd.Float64Dtype)
+
+        # Check unsupported arguments
+        with pytest.raises(ValueError, match="pandas-to-cudf"):
+            dser.to_backend("cudf", bad_arg=True)
+
+        with pytest.raises(ValueError, match="cudf-to-cudf"):
+            gser_null.to_backend("cudf", bad_arg=True)
+
+        with pytest.raises(ValueError, match="cudf-to-pandas"):
+            gser_null.to_backend("pandas", bad_arg=True)
+
+
 def test_from_cudf():
     np.random.seed(0)
 
@@ -547,8 +600,6 @@ def test_unary_ops(func, gdf, gddf):
 
     # Fixed in https://github.com/dask/dask/pull/4657
     if isinstance(p, cudf.Index):
-        from packaging import version
-
         if version.parse(dask.__version__) < version.parse("1.1.6"):
             pytest.skip(
                 "dask.dataframe assert_eq index check hardcoded to "

From 8ad4166c7026482a53a60f47b56dd5e1dec1a463 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:39:42 -0500
Subject: [PATCH 07/24] Remove cudf::strings::repeat_strings_output_sizes and
 optional parameter from cudf::strings::repeat_strings (#12609)

Removes `cudf::strings::repeat_strings_output_sizes` and the optional sizes parameter from `cudf::strings::repeat_strings`.
This function (and corresponding optional parameter) is no longer needed now that the internal utilities will throw an error if the column output size exceeds the maximum.
Closes #12542

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12609
---
 cpp/benchmarks/string/repeat_strings.cpp    |  56 +-----
 cpp/include/cudf/strings/repeat_strings.hpp |  94 +++-------
 cpp/src/strings/repeat_strings.cu           | 194 +++-----------------
 cpp/tests/strings/repeat_strings_tests.cpp  | 121 +-----------
 4 files changed, 56 insertions(+), 409 deletions(-)

diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp
index 1844e93bc53..fe015b27f13 100644
--- a/cpp/benchmarks/string/repeat_strings.cpp
+++ b/cpp/benchmarks/string/repeat_strings.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -79,42 +79,6 @@ static void BM_repeat_strings_column_times(benchmark::State& state)
                           (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
 }
 
-static void BM_compute_output_strings_sizes(benchmark::State& state)
-{
-  auto const n_rows           = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length   = static_cast<cudf::size_type>(state.range(1));
-  auto const table            = create_data_table(2, n_rows, max_str_length);
-  auto const strings_col      = cudf::strings_column_view(table->view().column(0));
-  auto const repeat_times_col = table->view().column(1);
-
-  for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
-    cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
-  }
-
-  state.SetBytesProcessed(state.iterations() *
-                          (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
-}
-
-static void BM_repeat_strings_column_times_precomputed_sizes(benchmark::State& state)
-{
-  auto const n_rows           = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length   = static_cast<cudf::size_type>(state.range(1));
-  auto const table            = create_data_table(2, n_rows, max_str_length);
-  auto const strings_col      = cudf::strings_column_view(table->view().column(0));
-  auto const repeat_times_col = table->view().column(1);
-  [[maybe_unused]] auto const [sizes, total_bytes] =
-    cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
-
-  for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
-    cudf::strings::repeat_strings(strings_col, repeat_times_col, *sizes);
-  }
-
-  state.SetBytesProcessed(state.iterations() *
-                          (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
-}
-
 static void generate_bench_args(benchmark::internal::Benchmark* b)
 {
   int const min_rows   = 1 << 8;
@@ -145,23 +109,5 @@ class RepeatStrings : public cudf::benchmark {
     ->UseManualTime()                                               \
     ->Unit(benchmark::kMillisecond);
 
-#define COMPUTE_OUTPUT_STRINGS_SIZES_BENCHMARK_DEFINE(name)          \
-  BENCHMARK_DEFINE_F(RepeatStrings, name)                            \
-  (::benchmark::State & st) { BM_compute_output_strings_sizes(st); } \
-  BENCHMARK_REGISTER_F(RepeatStrings, name)                          \
-    ->Apply(generate_bench_args)                                     \
-    ->UseManualTime()                                                \
-    ->Unit(benchmark::kMillisecond);
-
-#define REPEAT_STRINGS_COLUMN_TIMES_PRECOMPUTED_SIZES_BENCHMARK_DEFINE(name)          \
-  BENCHMARK_DEFINE_F(RepeatStrings, name)                                             \
-  (::benchmark::State & st) { BM_repeat_strings_column_times_precomputed_sizes(st); } \
-  BENCHMARK_REGISTER_F(RepeatStrings, name)                                           \
-    ->Apply(generate_bench_args)                                                      \
-    ->UseManualTime()                                                                 \
-    ->Unit(benchmark::kMillisecond);
-
 REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times)
 REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times)
-COMPUTE_OUTPUT_STRINGS_SIZES_BENCHMARK_DEFINE(compute_output_strings_sizes)
-REPEAT_STRINGS_COLUMN_TIMES_PRECOMPUTED_SIZES_BENCHMARK_DEFINE(precomputed_sizes)
diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp
index 0e6ee2126d3..26fe5f95983 100644
--- a/cpp/include/cudf/strings/repeat_strings.hpp
+++ b/cpp/include/cudf/strings/repeat_strings.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,15 +32,15 @@ namespace strings {
  */
 
 /**
- * @brief Repeat the given string scalar by a given number of times.
+ * @brief Repeat the given string scalar a given number of times
  *
  * An output string scalar is generated by repeating the input string by a number of times given by
- * the @p `repeat_times` parameter.
+ * the `repeat_times` parameter.
  *
  * In special cases:
- *  - If @p `repeat_times` is not a positive value, an empty (valid) string scalar will be returned.
+ *  - If `repeat_times` is not a positive value, an empty (valid) string scalar will be returned.
  *  - An invalid input scalar will always result in an invalid output scalar regardless of the
- *    value of @p `repeat_times` parameter.
+ *    value of `repeat_times` parameter.
  *
  * @code{.pseudo}
  * Example:
@@ -50,13 +50,13 @@ namespace strings {
  * @endcode
  *
  * @throw cudf::logic_error if the size of the output string scalar exceeds the maximum value that
- *        can be stored by the index type
- *        (i.e., @code input.size() * repeat_times > numeric_limits<size_type>::max() @endcode).
+ *        can be stored by the index type:
+ *        `input.size() * repeat_times > max of size_type`
  *
- * @param input The scalar containing the string to repeat.
- * @param repeat_times The number of times the input string is repeated.
- * @param mr Device memory resource used to allocate the returned string scalar.
- * @return New string scalar in which the input string is repeated.
+ * @param input The scalar containing the string to repeat
+ * @param repeat_times The number of times the input string is repeated
+ * @param mr Device memory resource used to allocate the returned string scalar
+ * @return New string scalar in which the input string is repeated
  */
 std::unique_ptr<string_scalar> repeat_string(
   string_scalar const& input,
@@ -64,19 +64,16 @@ std::unique_ptr<string_scalar> repeat_string(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Repeat each string in the given strings column by a given number of times.
+ * @brief Repeat each string in the given strings column a given number of times
  *
- * An output strings column is generated by repeating each string from the input strings column by a
- * number of times given by the @p `repeat_times` parameter.
+ * An output strings column is generated by repeating each string from the input strings column by
+ * the number of times given by the `repeat_times` parameter.
  *
  * In special cases:
- *  - If @p `repeat_times` is not a positive number, a non-null input string will always result in
+ *  - If `repeat_times` is not a positive number, a non-null input string will always result in
  *    an empty output string.
  *  - A null input string will always result in a null output string regardless of the value of the
- *    @p `repeat_times` parameter.
- *
- * The caller is responsible for checking the output column size will not exceed the maximum size of
- * a strings column (number of total characters is less than the max size_type value).
+ *    `repeat_times` parameter.
  *
  * @code{.pseudo}
  * Example:
@@ -85,10 +82,10 @@ std::unique_ptr<string_scalar> repeat_string(
  * out is ['aaaaaa', null, '', 'bbcbbcbbc']
  * @endcode
  *
- * @param input The column containing strings to repeat.
- * @param repeat_times The number of times each input string is repeated.
- * @param mr Device memory resource used to allocate the returned strings column.
- * @return New column containing the repeated strings.
+ * @param input The column containing strings to repeat
+ * @param repeat_times The number of times each input string is repeated
+ * @param mr Device memory resource used to allocate the returned strings column
+ * @return New column containing the repeated strings
  */
 std::unique_ptr<column> repeat_strings(
   strings_column_view const& input,
@@ -97,11 +94,10 @@ std::unique_ptr<column> repeat_strings(
 
 /**
  * @brief Repeat each string in the given strings column by the numbers of times given in another
- * numeric column.
+ * numeric column
  *
  * An output strings column is generated by repeating each of the input string by a number of times
- * given by the corresponding row in a @p `repeat_times` numeric column. The computational time can
- * be reduced if sizes of the output strings are known and provided.
+ * given by the corresponding row in a `repeat_times` numeric column.
  *
  * In special cases:
  *  - Any null row (from either the input strings column or the `repeat_times` column) will always
@@ -109,9 +105,6 @@ std::unique_ptr<column> repeat_strings(
  *  - If any value in the `repeat_times` column is not a positive number and its corresponding input
  *    string is not null, the output string will be an empty string.
  *
- * The caller is responsible for checking the output column size will not exceed the maximum size of
- * a strings column (number of total characters is less than the max size_type value).
- *
  * @code{.pseudo}
  * Example:
  * strs         = ['aa', null, '', 'bbc-']
@@ -120,51 +113,16 @@ std::unique_ptr<column> repeat_strings(
  * out is ['aa', null, '', 'bbc-bbc-bbc-bbc-']
  * @endcode
  *
- * @throw cudf::logic_error if the input `repeat_times` column has data type other than integer.
+ * @throw cudf::logic_error if the input `repeat_times` is not an integer type
  * @throw cudf::logic_error if the input columns have different sizes.
  *
- * @param input The column containing strings to repeat.
+ * @param input The column containing strings to repeat
  * @param repeat_times The column containing numbers of times that the corresponding input strings
- *        are repeated.
- * @param output_strings_sizes The optional column containing pre-computed sizes of the output
- *        strings.
- * @param mr Device memory resource used to allocate the returned strings column.
+ *                     are repeated
+ * @param mr Device memory resource used to allocate the returned strings column
  * @return New column containing the repeated strings.
  */
 std::unique_ptr<column> repeat_strings(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  std::optional<column_view> output_strings_sizes = std::nullopt,
-  rmm::mr::device_memory_resource* mr             = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Compute sizes of the output strings if each string in the input strings column
- * is repeated by the numbers of times given in another numeric column.
- *
- * The output column storing string output sizes is not nullable. These string sizes are
- * also summed up and returned (in an `int64_t` value), which can be used to detect if the input
- * strings column can be safely repeated without data corruption due to overflow in string indexing.
- *
- * @code{.pseudo}
- * Example:
- * strs         = ['aa', null, '', 'bbc-']
- * repeat_times = [ 1,   2,     3,  4   ]
- * [output_sizes, total_size] = repeat_strings_output_sizes(strs, repeat_times)
- * out is [2, 0, 0, 16], and total_size = 18
- * @endcode
- *
- * @throw cudf::logic_error if the input `repeat_times` column has data type other than integer.
- * @throw cudf::logic_error if the input columns have different sizes.
- *
- * @param input The column containing strings to repeat.
- * @param repeat_times The column containing numbers of times that the corresponding input strings
- *        are repeated.
- * @param mr Device memory resource used to allocate the returned strings column.
- * @return A pair with the first item is an int32_t column containing sizes of the output strings,
- *         and the second item is an int64_t number containing the total sizes (in bytes) of the
- *         output strings column.
- */
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
   strings_column_view const& input,
   column_view const& repeat_times,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu
index cc283fbcee2..3784b535a5b 100644
--- a/cpp/src/strings/repeat_strings.cu
+++ b/cpp/src/strings/repeat_strings.cu
@@ -176,7 +176,7 @@ namespace {
  * separate number of times.
  */
 template <class Iterator>
-struct compute_size_and_repeat_separately_fn {
+struct compute_sizes_and_repeat_fn {
   column_device_view const strings_dv;
   column_device_view const repeat_times_dv;
   Iterator const repeat_times_iter;
@@ -189,146 +189,63 @@ struct compute_size_and_repeat_separately_fn {
   // If d_chars != nullptr: only repeat strings.
   char* d_chars{nullptr};
 
-  __device__ int64_t operator()(size_type const idx) const noexcept
+  __device__ void operator()(size_type const idx) const noexcept
   {
     auto const string_is_valid = !strings_has_nulls || strings_dv.is_valid_nocheck(idx);
     auto const rtimes_is_valid = !rtimes_has_nulls || repeat_times_dv.is_valid_nocheck(idx);
 
     // Any null input (either string or repeat_times value) will result in a null output.
     auto const is_valid = string_is_valid && rtimes_is_valid;
+    if (!is_valid) {
+      if (!d_chars) { d_offsets[idx] = 0; }
+      return;
+    }
 
-    // When the input string is null, `repeat_times` and `string_size` are also set to 0.
-    // This makes sure that if `repeat_times > 0` then we will always have a valid input string,
-    // and if `repeat_times <= 0` we will never copy anything to the output.
-    auto const repeat_times = is_valid ? repeat_times_iter[idx] : size_type{0};
-    auto const string_size =
-      is_valid ? strings_dv.element<string_view>(idx).size_bytes() : size_type{0};
-
-    // The output_size is returned, and it needs to be an int64_t number to prevent overflow.
-    auto const output_size =
-      repeat_times > 0 ? static_cast<int64_t>(repeat_times) * static_cast<int64_t>(string_size)
-                       : int64_t{0};
+    auto repeat_times = repeat_times_iter[idx];
+    auto const d_str  = strings_dv.element<string_view>(idx);
 
     if (!d_chars) {
-      // If overflow happen, the stored value of output string size will be incorrect due to
-      // downcasting. In such cases, the entire output string size array should be discarded.
-      d_offsets[idx] = static_cast<offset_type>(output_size);
-    } else if (repeat_times > 0 && string_size > 0) {
-      auto const d_str     = strings_dv.element<string_view>(idx);
-      auto const input_ptr = d_str.data();
-      auto output_ptr      = d_chars + d_offsets[idx];
-      for (size_type repeat_idx = 0; repeat_idx < repeat_times; ++repeat_idx) {
-        output_ptr = copy_and_increment(output_ptr, input_ptr, string_size);
+      // repeat_times could be negative
+      d_offsets[idx] = (repeat_times > 0) ? (repeat_times * d_str.size_bytes()) : 0;
+    } else {
+      auto output_ptr = d_chars + d_offsets[idx];
+      while (repeat_times-- > 0) {
+        output_ptr = copy_and_increment(output_ptr, d_str.data(), d_str.size_bytes());
       }
     }
-
-    // The output_size value may be used to sum up to detect overflow at the caller site.
-    // The caller can detect overflow easily by checking `SUM(output_size) > INT_MAX`.
-    return output_size;
   }
 };
 
-/**
- * @brief Creates child offsets and chars columns by applying the template function that
- * can be used for computing the output size of each string as well as create the output.
- *
- * This function is similar to `strings::detail::make_strings_children`, except that it accepts an
- * optional input `std::optional<column_view>` that can contain the precomputed sizes of the output
- * strings.
- *
- * @deprecated This will be removed with issue 12542
- */
-template <typename Func>
-auto make_strings_children(Func fn,
-                           size_type exec_size,
-                           size_type strings_count,
-                           std::optional<column_view> output_strings_sizes,
-                           rmm::cuda_stream_view stream,
-                           rmm::mr::device_memory_resource* mr)
-{
-  auto offsets_column = make_numeric_column(
-    data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
-
-  auto offsets_view = offsets_column->mutable_view();
-  auto d_offsets    = offsets_view.template data<size_type>();
-  fn.d_offsets      = d_offsets;
-
-  // This may be called twice -- once for offsets and once for chars.
-  auto for_each_fn = [exec_size, stream](Func& fn) {
-    thrust::for_each_n(
-      rmm::exec_policy(stream), thrust::make_counting_iterator<size_type>(0), exec_size, fn);
-  };
-
-  if (!output_strings_sizes.has_value()) {
-    // Compute the output sizes only if they are not given.
-    for_each_fn(fn);
-
-    // Compute the offsets values.
-    auto const bytes =
-      cudf::detail::sizes_to_offsets(d_offsets, d_offsets + strings_count + 1, d_offsets, stream);
-    CUDF_EXPECTS(bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
-                 "Size of output exceeds column size limit");
-  } else {
-    // Compute the offsets values from the provided output string sizes.
-    auto const string_sizes = output_strings_sizes.value();
-    CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(offset_type), stream.value()));
-    thrust::inclusive_scan(rmm::exec_policy(stream),
-                           string_sizes.template begin<size_type>(),
-                           string_sizes.template end<size_type>(),
-                           d_offsets + 1);
-  }
-
-  // Now build the chars column
-  auto const bytes  = cudf::detail::get_value<size_type>(offsets_view, strings_count, stream);
-  auto chars_column = create_chars_child_column(bytes, stream, mr);
-
-  // Execute the function fn again to fill the chars column.
-  // Note that if the output chars column has zero size, the function fn should not be called to
-  // avoid accidentally overwriting the offsets.
-  if (bytes > 0) {
-    fn.d_chars = chars_column->mutable_view().template data<char>();
-    for_each_fn(fn);
-  }
-
-  return std::pair(std::move(offsets_column), std::move(chars_column));
-}
-
 }  // namespace
 
 std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                                        column_view const& repeat_times,
-                                       std::optional<column_view> output_strings_sizes,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(input.size() == repeat_times.size(), "The input columns must have the same size.");
   CUDF_EXPECTS(cudf::is_index_type(repeat_times.type()),
                "repeat_strings expects an integer type for the `repeat_times` input column.");
-  if (output_strings_sizes.has_value()) {
-    auto const output_sizes = output_strings_sizes.value();
-    CUDF_EXPECTS(input.size() == output_sizes.size() &&
-                   (!output_sizes.nullable() || !output_sizes.has_nulls()),
-                 "The given column of output string sizes is invalid.");
-  }
 
   auto const strings_count = input.size();
   if (strings_count == 0) { return make_empty_column(type_id::STRING); }
 
   auto const strings_dv_ptr      = column_device_view::create(input.parent(), stream);
   auto const repeat_times_dv_ptr = column_device_view::create(repeat_times, stream);
-  auto const strings_has_nulls   = input.has_nulls();
-  auto const rtimes_has_nulls    = repeat_times.has_nulls();
   auto const repeat_times_iter =
     cudf::detail::indexalator_factory::make_input_iterator(repeat_times);
-  auto const fn = compute_size_and_repeat_separately_fn<decltype(repeat_times_iter)>{
-    *strings_dv_ptr, *repeat_times_dv_ptr, repeat_times_iter, strings_has_nulls, rtimes_has_nulls};
-
-  auto [offsets_column, chars_column] =
-    make_strings_children(fn, strings_count, strings_count, output_strings_sizes, stream, mr);
-
-  // We generate new bitmask by AND of the input columns' bitmasks.
-  // Note that if the input columns are nullable, the output column will also be nullable (which may
-  // not have nulls).
+  auto const fn =
+    compute_sizes_and_repeat_fn<decltype(repeat_times_iter)>{*strings_dv_ptr,
+                                                             *repeat_times_dv_ptr,
+                                                             repeat_times_iter,
+                                                             input.has_nulls(),
+                                                             repeat_times.has_nulls()};
+
+  auto [offsets_column, chars_column] = make_strings_children(fn, strings_count, stream, mr);
+
+  // We generate new bitmask by AND of the two input columns' bitmasks.
+  // Note that if either of the input columns are nullable, the output column will also be nullable
+  // but may not have nulls.
   auto [null_mask, null_count] =
     cudf::detail::bitmask_and(table_view{{input.parent(), repeat_times}}, stream, mr);
 
@@ -338,52 +255,6 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                              null_count,
                              std::move(null_mask));
 }
-
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
-{
-  CUDF_EXPECTS(input.size() == repeat_times.size(), "The input columns must have the same size.");
-  CUDF_EXPECTS(
-    cudf::is_index_type(repeat_times.type()),
-    "repeat_strings_output_sizes expects an integer type for the `repeat_times` input column.");
-
-  auto const strings_count = input.size();
-  if (strings_count == 0) {
-    return std::pair(make_empty_column(type_to_id<size_type>()), int64_t{0});
-  }
-
-  auto output_sizes = make_numeric_column(
-    data_type{type_to_id<size_type>()}, strings_count, mask_state::UNALLOCATED, stream, mr);
-
-  auto const strings_dv_ptr      = column_device_view::create(input.parent(), stream);
-  auto const repeat_times_dv_ptr = column_device_view::create(repeat_times, stream);
-  auto const strings_has_nulls   = input.has_nulls();
-  auto const rtimes_has_nulls    = repeat_times.has_nulls();
-  auto const repeat_times_iter =
-    cudf::detail::indexalator_factory::make_input_iterator(repeat_times);
-
-  auto const fn = compute_size_and_repeat_separately_fn<decltype(repeat_times_iter)>{
-    *strings_dv_ptr,
-    *repeat_times_dv_ptr,
-    repeat_times_iter,
-    strings_has_nulls,
-    rtimes_has_nulls,
-    output_sizes->mutable_view().template begin<size_type>()};
-
-  auto const total_bytes =
-    thrust::transform_reduce(rmm::exec_policy(stream),
-                             thrust::make_counting_iterator<size_type>(0),
-                             thrust::make_counting_iterator<size_type>(strings_count),
-                             fn,
-                             int64_t{0},
-                             thrust::plus{});
-
-  return std::pair(std::move(output_sizes), total_bytes);
-}
-
 }  // namespace detail
 
 std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
@@ -404,21 +275,10 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
 
 std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                                        column_view const& repeat_times,
-                                       std::optional<column_view> output_strings_sizes,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat_strings(
-    input, repeat_times, output_strings_sizes, cudf::get_default_stream(), mr);
-}
-
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::repeat_strings_output_sizes(input, repeat_times, cudf::get_default_stream(), mr);
+  return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/tests/strings/repeat_strings_tests.cpp b/cpp/tests/strings/repeat_strings_tests.cpp
index 69d0494c253..e75409d9f39 100644
--- a/cpp/tests/strings/repeat_strings_tests.cpp
+++ b/cpp/tests/strings/repeat_strings_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -207,20 +207,6 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesInvalidInput)
     EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times), cudf::logic_error);
   }
 
-  // Sizes mismatched between strings column and output_strings_sizes column.
-  {
-    auto const repeat_times = int32s_col{1, 2};
-    auto const sizes        = int32s_col{1, 2, 3, 4, 5};
-    EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times, sizes), cudf::logic_error);
-  }
-
-  // output_strings_sizes column has nulls.
-  {
-    auto const repeat_times = int32s_col{1, 2};
-    auto const sizes        = int32s_col{{null, 2}, null_at(0)};
-    EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times, sizes), cudf::logic_error);
-  }
-
   // Invalid data type for repeat_times column.
   {
     auto const repeat_times = cudf::test::fixed_width_column_wrapper<float>{1, 2, 3, 4, 5, 6};
@@ -243,11 +229,7 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesOverflowOutput)
   auto const repeat_times =
     int32s_col{half_max, half_max, half_max, half_max, half_max, half_max, half_max};
 
-  auto const [sizes, total_bytes] =
-    cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-  (void)sizes;
-  auto const expected_bytes = static_cast<int64_t>(half_max) * int64_t{1 + 2 + 3 + 4 + 5 + 6 + 7};
-  EXPECT_EQ(expected_bytes, total_bytes);
+  EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times), cudf::logic_error);
 }
 
 TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithScalarRepeatTimes)
@@ -301,15 +283,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 12, 27, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // repeat_times column has nulls.
@@ -320,15 +293,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 27, 12, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -377,15 +341,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 12, 27};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the middle of the column.
@@ -397,15 +352,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{12, 27};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(39, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column.
@@ -417,15 +363,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{27, 12, 12};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(51, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -520,15 +457,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnWithNullsWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 18, 0, 0, 0, 12, 12, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(48, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // repeat_times column has nulls.
@@ -549,15 +477,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnWithNullsWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 0, 0, 0, 0, 12, 0, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(18, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -631,15 +550,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(6, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the middle of the column.
@@ -652,15 +562,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{0, 0, 0, 0, 12};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(12, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column, output has nulls.
@@ -672,15 +573,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{12, 0, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(12, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column, output does not have null.
@@ -693,14 +585,5 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(0, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_strs, *results, verbosity);
   }
 }

From 476d5bbf9cfdbcef024bdccc29f30cd1c6fdbc94 Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:02:08 -0600
Subject: [PATCH 08/24] Handle parquet list data corner case (#12698)

Fixes an issue with a particular arrangement of page data related to lists.  Specifically, it is possible for page `N` to contain "0" rows because the values for the row it is a part of start on page `N-1` and end on page `N+1`.  This was defeating logic in the decode kernel that would erroneously cause these values to be skipped.

Similar to https://github.com/rapidsai/cudf/pull/12488 this is only reproducible with data out in the wild.  In this case, we have a file that we could in theory check in to create a test with, but it is 16 MB so it's fairly large.  Looking for feedback on whether this is too big.

Authors:
  - https://github.com/nvdbaranec
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12698
---
 cpp/src/io/parquet/page_data.cu | 50 +++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 23d130e1585..ee115e7432a 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -104,20 +104,41 @@ struct page_state_s {
  * specified row bounds
  *
  * @param s The page to be checked
- * @param min_row The starting row index
+ * @param start_row The starting row index
  * @param num_rows The number of rows
  *
  * @return True if the page spans the beginning or the end of the row bounds
  */
-inline __device__ bool is_bounds_page(page_state_s* const s, size_t min_row, size_t num_rows)
+inline __device__ bool is_bounds_page(page_state_s* const s, size_t start_row, size_t num_rows)
 {
   size_t const page_begin = s->col.start_row + s->page.chunk_row;
   size_t const page_end   = page_begin + s->page.num_rows;
-  size_t const begin      = min_row;
-  size_t const end        = min_row + num_rows;
+  size_t const begin      = start_row;
+  size_t const end        = start_row + num_rows;
+
   return ((page_begin <= begin && page_end >= begin) || (page_begin <= end && page_end >= end));
 }
 
+/**
+ * @brief Returns whether or not a page is completely contained within the specified
+ * row bounds
+ *
+ * @param s The page to be checked
+ * @param start_row The starting row index
+ * @param num_rows The number of rows
+ *
+ * @return True if the page is completely contained within the row bounds
+ */
+inline __device__ bool is_page_contained(page_state_s* const s, size_t start_row, size_t num_rows)
+{
+  size_t const page_begin = s->col.start_row + s->page.chunk_row;
+  size_t const page_end   = page_begin + s->page.num_rows;
+  size_t const begin      = start_row;
+  size_t const end        = start_row + num_rows;
+
+  return page_begin >= begin && page_end <= end;
+}
+
 /**
  * @brief Read a 32-bit varint integer
  *
@@ -1728,10 +1749,11 @@ __global__ void __launch_bounds__(block_size)
       auto const thread_depth = depth + t;
       if (thread_depth < s->page.num_output_nesting_levels) {
         // if we are not a bounding page (as checked above) then we are either
-        // returning 0 rows from the page (completely outside the bounds) or all
-        // rows in the page (completely within the bounds)
+        // returning all rows/values from this page, or 0 of them
         pp->nesting[thread_depth].batch_size =
-          s->num_rows == 0 ? 0 : pp->nesting[thread_depth].size;
+          (s->num_rows == 0 && !is_page_contained(s, min_row, num_rows))
+            ? 0
+            : pp->nesting[thread_depth].size;
       }
       depth += blockDim.x;
     }
@@ -1838,7 +1860,19 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
 
   // if we have no work to do (eg, in a skip_rows/num_rows case) in this page.
-  if (s->num_rows == 0 && !(has_repetition && is_bounds_page(s, min_row, num_rows))) { return; }
+  //
+  // corner case: in the case of lists, we can have pages that contain "0" rows if the current row
+  // starts before this page and ends after this page:
+  //       P0        P1        P2
+  //  |---------|---------|----------|
+  //        ^------------------^
+  //      row start           row end
+  // P1 will contain 0 rows
+  //
+  if (s->num_rows == 0 && !(has_repetition && (is_bounds_page(s, min_row, num_rows) ||
+                                               is_page_contained(s, min_row, num_rows)))) {
+    return;
+  }
 
   if (s->dict_base) {
     out_thread0 = (s->dict_bits > 0) ? 64 : 32;

From 89ec635dceacde2b6715af253029ef317905df4e Mon Sep 17 00:00:00 2001
From: AJ Schmidt <ajschmidt8@users.noreply.github.com>
Date: Wed, 8 Feb 2023 12:17:50 -0500
Subject: [PATCH 09/24] Update shared workflow branches (#12733)

This PR updates the branch reference used for our shared workflows.

Authors:
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12733
---
 .github/workflows/build.yaml | 14 +++++++-------
 .github/workflows/pr.yaml    | 26 +++++++++++++-------------
 .github/workflows/test.yaml  | 14 +++++++-------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 3366554db30..26d07515f70 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
       skip_upload_pkgs: libcudf-example
   wheel-build-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
   wheel-publish-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -77,7 +77,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-publish-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-publish-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index cf20b0006a2..f33fc15c52f 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -25,32 +25,32 @@ jobs:
       - wheel-build-dask-cudf
       - wheel-tests-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-python-cudf-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: pull-request
       test_script: "ci/test_python_cudf.sh"
@@ -58,14 +58,14 @@ jobs:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: pull-request
       test_script: "ci/test_python_other.sh"
   conda-java-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: pull-request
       node_type: "gpu-latest-1"
@@ -75,7 +75,7 @@ jobs:
   conda-notebook-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: pull-request
       node_type: "gpu-latest-1"
@@ -85,7 +85,7 @@ jobs:
   wheel-build-cudf:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: cudf
@@ -94,7 +94,7 @@ jobs:
   wheel-tests-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: cudf
@@ -106,7 +106,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-tests-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: dask_cudf
@@ -115,7 +115,7 @@ jobs:
   wheel-tests-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: dask_cudf
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 1b117bb2f4f..ff19d51f8ef 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-cudf-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -43,7 +43,7 @@ jobs:
       test_script: "ci/test_python_other.sh"
   conda-java-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
       run_script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
       run_script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
       test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
   wheel-tests-dask-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}

From d3f9dafa49c973c5e5d8b8a9336bbc92555ea0c3 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 8 Feb 2023 11:41:10 -0600
Subject: [PATCH 10/24] Fix faulty conditional logic in JIT `GroupBy.apply`
 (#12706)

Closes https://github.com/rapidsai/cudf/issues/12686

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12706
---
 python/cudf/cudf/tests/test_groupby.py  | 17 +++++++++++++++++
 python/cudf/udf_cpp/groupby/function.cu |  6 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index c5b330fd89c..1fea3c7a37e 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -519,6 +519,23 @@ def test_groupby_apply_jit_args(func, args, groupby_jit_data):
     run_groupby_apply_jit_test(groupby_jit_data, func, ["key1", "key2"], *args)
 
 
+def test_groupby_apply_jit_block_divergence():
+    # https://github.com/rapidsai/cudf/issues/12686
+    df = cudf.DataFrame(
+        {
+            "a": [0, 0, 0, 1, 1, 1],
+            "b": [1, 1, 1, 2, 3, 4],
+        }
+    )
+
+    def diverging_block(grp_df):
+        if grp_df["a"].mean() > 0:
+            return grp_df["b"].mean()
+        return 0
+
+    run_groupby_apply_jit_test(df, diverging_block, ["a"])
+
+
 @pytest.mark.parametrize("nelem", [2, 3, 100, 500, 1000])
 @pytest.mark.parametrize(
     "func",
diff --git a/python/cudf/udf_cpp/groupby/function.cu b/python/cudf/udf_cpp/groupby/function.cu
index f94f99c4b49..782371b8a44 100644
--- a/python/cudf/udf_cpp/groupby/function.cu
+++ b/python/cudf/udf_cpp/groupby/function.cu
@@ -284,7 +284,7 @@ extern "C" {
   __device__ int name##_##cname(return_type* numba_return_value, type* const data, int64_t size) \
   {                                                                                              \
     return_type const res = name<type>(data, size);                                              \
-    if (threadIdx.x == 0) { *numba_return_value = res; }                                         \
+    *numba_return_value   = res;                                                                 \
     __syncthreads();                                                                             \
     return 0;                                                                                    \
   }
@@ -309,8 +309,8 @@ extern "C" {
   __device__ int name##_##cname(                                                 \
     int64_t* numba_return_value, type* const data, int64_t* index, int64_t size) \
   {                                                                              \
-    auto const res = name<type>(data, index, size);                              \
-    if (threadIdx.x == 0) { *numba_return_value = res; }                         \
+    auto const res      = name<type>(data, index, size);                         \
+    *numba_return_value = res;                                                   \
     __syncthreads();                                                             \
     return 0;                                                                    \
   }

From 0161ba896a1d70ba3e049bdbb3d649cedba2aeb0 Mon Sep 17 00:00:00 2001
From: Cindy Jiang <47068112+cindyyuanjiang@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:48:38 -0800
Subject: [PATCH 11/24] Add `regex_program` strings replacing java APIs and
 tests (#12701)

This PR adds [replace_re, replace_with_backrefs](https://docs.rapids.ai/api/libcudf/nightly/replace__re_8hpp.html) related `regex_program` java APIs and unit tests.
Part of work for https://github.com/NVIDIA/spark-rapids/issues/7295.

Authors:
  - Cindy Jiang (https://github.com/cindyyuanjiang)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12701
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 71 +++++++++++---
 java/src/main/native/src/ColumnViewJni.cpp    | 43 ++++----
 .../java/ai/rapids/cudf/ColumnVectorTest.java | 98 ++++++++++++-------
 3 files changed, 149 insertions(+), 63 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 2d0bf28225f..0cb9ed37d9f 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2922,8 +2922,21 @@ public final ColumnVector stringReplace(Scalar target, Scalar replace) {
    * @param repl The string scalar to replace for each pattern match.
    * @return A new column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector replaceRegex(String pattern, Scalar repl) {
-    return replaceRegex(pattern, repl, -1);
+    return replaceRegex(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), repl);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program pattern
+   * using the replacement string scalar.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param repl The string scalar to replace for each pattern match.
+   * @return A new column vector containing the string results.
+   */
+  public final ColumnVector replaceRegex(RegexProgram regexProg, Scalar repl) {
+    return replaceRegex(regexProg, repl, -1);
   }
 
   /**
@@ -2935,12 +2948,27 @@ public final ColumnVector replaceRegex(String pattern, Scalar repl) {
    * @param maxRepl The maximum number of times a replacement should occur within each string.
    * @return A new column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector replaceRegex(String pattern, Scalar repl, int maxRepl) {
+    return replaceRegex(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), repl, maxRepl);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program pattern
+   * using the replacement string scalar.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param repl The string scalar to replace for each pattern match.
+   * @param maxRepl The maximum number of times a replacement should occur within each string.
+   * @return A new column vector containing the string results.
+   */
+  public final ColumnVector replaceRegex(RegexProgram regexProg, Scalar repl, int maxRepl) {
     if (!repl.getType().equals(DType.STRING)) {
       throw new IllegalArgumentException("Replacement must be a string scalar");
     }
-    return new ColumnVector(replaceRegex(getNativeView(), pattern, repl.getScalarHandle(),
-        maxRepl));
+    assert regexProg != null : "regex program may not be null";
+    return new ColumnVector(replaceRegex(getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                         regexProg.capture().nativeId, repl.getScalarHandle(), maxRepl));
   }
 
   /**
@@ -2966,9 +2994,26 @@ public final ColumnVector replaceMultiRegex(String[] patterns, ColumnView repls)
    * @param replace The replacement template for creating the output string.
    * @return A new java column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector stringReplaceWithBackrefs(String pattern, String replace) {
-    return new ColumnVector(stringReplaceWithBackrefs(getNativeView(), pattern,
-        replace));
+    return stringReplaceWithBackrefs(new RegexProgram(pattern), replace);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program
+   * pattern using the replace template for back-references.
+   *
+   * Any null string entries return corresponding null output column entries.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param replace The replacement template for creating the output string.
+   * @return A new java column vector containing the string results.
+   */
+  public final ColumnVector stringReplaceWithBackrefs(RegexProgram regexProg, String replace) {
+    assert regexProg != null : "regex program may not be null";
+    return new ColumnVector(
+        stringReplaceWithBackrefs(getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                  regexProg.capture().nativeId, replace));
   }
 
   /**
@@ -4129,12 +4174,14 @@ private static native long substringColumn(long columnView, long startColumn, lo
    * Native method for replacing each regular expression pattern match with the specified
    * replacement string.
    * @param columnView native handle of the cudf::column_view being operated on.
-   * @param pattern The regular expression pattern to search within each string.
+   * @param pattern regular expression pattern to search within each string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param repl native handle of the cudf::scalar containing the replacement string.
    * @param maxRepl maximum number of times to replace the pattern within a string
    * @return native handle of the resulting cudf column containing the string results.
    */
-  private static native long replaceRegex(long columnView, String pattern,
+  private static native long replaceRegex(long columnView, String pattern, int flags, int capture,
                                           long repl, long maxRepl) throws CudfException;
 
   /**
@@ -4148,15 +4195,17 @@ private static native long replaceMultiRegex(long columnView, String[] patterns,
                                                long repls) throws CudfException;
 
   /**
-   * Native method for replacing any character sequence matching the given pattern
-   * using the replace template for back-references.
+   * Native method for replacing any character sequence matching the given regex program
+   * pattern using the replace template for back-references.
    * @param columnView native handle of the cudf::column_view being operated on.
    * @param pattern The regular expression patterns to search within each string.
+   * @param flags Regex flags setting.
+   * @param capture Capture groups setting.
    * @param replace The replacement template for creating the output string.
    * @return native handle of the resulting cudf column containing the string results.
    */
-  private static native long stringReplaceWithBackrefs(long columnView, String pattern,
-                                                       String replace) throws CudfException;
+  private static native long stringReplaceWithBackrefs(long columnView, String pattern, int flags,
+                                                       int capture, String replace) throws CudfException;
 
   /**
    * Native method for checking if strings in a column starts with a specified comparison string.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 958efd364ed..c42cc430560 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -1606,21 +1606,24 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapContains(JNIEnv *env,
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceRegex(JNIEnv *env, jclass,
-                                                                    jlong j_column_view,
-                                                                    jstring j_pattern, jlong j_repl,
-                                                                    jlong j_maxrepl) {
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceRegex(
+    JNIEnv *env, jclass, jlong j_column_view, jstring j_pattern, jint regex_flags,
+    jint capture_groups, jlong j_repl, jlong j_maxrepl) {
 
   JNI_NULL_CHECK(env, j_column_view, "column is null", 0);
   JNI_NULL_CHECK(env, j_pattern, "pattern string is null", 0);
   JNI_NULL_CHECK(env, j_repl, "replace scalar is null", 0);
   try {
     cudf::jni::auto_set_device(env);
-    auto cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
-    cudf::strings_column_view scv(*cv);
-    cudf::jni::native_jstring pattern(env, j_pattern);
-    auto repl = reinterpret_cast<cudf::string_scalar const *>(j_repl);
-    return release_as_jlong(cudf::strings::replace_re(scv, pattern.get(), *repl, j_maxrepl));
+    auto const cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
+    auto const strings_column = cudf::strings_column_view{*cv};
+    auto const pattern = cudf::jni::native_jstring(env, j_pattern);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern.get(), flags, groups);
+    auto const repl = reinterpret_cast<cudf::string_scalar const *>(j_repl);
+    return release_as_jlong(
+        cudf::strings::replace_re(strings_column, *regex_prog, *repl, j_maxrepl));
   }
   CATCH_STD(env, 0);
 }
@@ -1646,19 +1649,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceMultiRegex(JNIEnv
 }
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringReplaceWithBackrefs(
-    JNIEnv *env, jclass, jlong column_view, jstring patternObj, jstring replaceObj) {
+    JNIEnv *env, jclass, jlong j_column_view, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jstring replace_obj) {
 
-  JNI_NULL_CHECK(env, column_view, "column is null", 0);
-  JNI_NULL_CHECK(env, patternObj, "pattern string is null", 0);
-  JNI_NULL_CHECK(env, replaceObj, "replace string is null", 0);
+  JNI_NULL_CHECK(env, j_column_view, "column is null", 0);
+  JNI_NULL_CHECK(env, pattern_obj, "pattern string is null", 0);
+  JNI_NULL_CHECK(env, replace_obj, "replace string is null", 0);
   try {
     cudf::jni::auto_set_device(env);
-    cudf::column_view *cv = reinterpret_cast<cudf::column_view *>(column_view);
-    cudf::strings_column_view scv(*cv);
-    cudf::jni::native_jstring ss_pattern(env, patternObj);
-    cudf::jni::native_jstring ss_replace(env, replaceObj);
+    auto const cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
+    auto const strings_column = cudf::strings_column_view{*cv};
+    auto const pattern = cudf::jni::native_jstring(env, pattern_obj);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern.get(), flags, groups);
+    cudf::jni::native_jstring ss_replace(env, replace_obj);
     return release_as_jlong(
-        cudf::strings::replace_with_backrefs(scv, ss_pattern.get(), ss_replace.get()));
+        cudf::strings::replace_with_backrefs(strings_column, *regex_prog, ss_replace.get()));
   }
   CATCH_STD(env, 0);
 }
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index ab4baf74277..db64dcb08c7 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -5147,29 +5147,42 @@ void teststringReplaceThrowsException() {
 
   @Test
   void testReplaceRegex() {
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl);
-         ColumnVector expected =
-             ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
-      assertColumnsAreEqual(expected, actual);
-    }
+    try (ColumnVector v = ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
+         Scalar repl = Scalar.fromString("Repl")) {
+      String pattern = "[tT]itle";
+      RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
 
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl, 0)) {
-      assertColumnsAreEqual(v, actual);
-    }
+      try (ColumnVector actual = v.replaceRegex(pattern, repl);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
 
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl, 1);
-         ColumnVector expected =
-             ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
-      assertColumnsAreEqual(expected, actual);
+      try (ColumnVector actual = v.replaceRegex(pattern, repl, 0)) {
+        assertColumnsAreEqual(v, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(pattern, repl, 1);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl, 0)) {
+        assertColumnsAreEqual(v, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl, 1);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
     }
   }
 
@@ -5188,45 +5201,55 @@ void testReplaceMultiRegex() {
   @Test
   void testStringReplaceWithBackrefs() {
 
-    try (ColumnVector v = ColumnVector.fromStrings("<h1>title</h1>", "<h1>another title</h1>",
-        null);
+    try (ColumnVector v = ColumnVector.fromStrings("<h1>title</h1>", "<h1>another title</h1>", null);
          ColumnVector expected = ColumnVector.fromStrings("<h2>title</h2>",
              "<h2>another title</h2>", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs("<h1>(.*)</h1>", "<h2>\\1</h2>")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("<h1>(.*)</h1>", "<h2>\\1</h2>");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("<h1>(.*)</h1>"), "<h2>\\1</h2>")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     try (ColumnVector v = ColumnVector.fromStrings("2020-1-01", "2020-2-02", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01", "2020-02-02", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])-", "-0\\1-")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])-", "-0\\1-");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])-"), "-0\\1-")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
-    try (ColumnVector v = ColumnVector.fromStrings("2020-01-1", "2020-02-2",
-        "2020-03-3invalid", null);
+    try (ColumnVector v = ColumnVector.fromStrings("2020-01-1", "2020-02-2", "2020-03-3invalid", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01", "2020-02-02",
              "2020-03-3invalid", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "-([0-9])$", "-0\\1")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])$", "-0\\1");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])$"), "-0\\1")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     try (ColumnVector v = ColumnVector.fromStrings("2020-01-1 random_text", "2020-02-2T12:34:56",
-        "2020-03-3invalid", null);
+             "2020-03-3invalid", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01 random_text",
              "2020-02-02T12:34:56", "2020-03-3invalid", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "-([0-9])([ T])", "-0\\1\\2")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])([ T])", "-0\\1\\2");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])([ T])"), "-0\\1\\2")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     // test zero as group index
     try (ColumnVector v = ColumnVector.fromStrings("aa-11 b2b-345", "aa-11a 1c-2b2 b2-c3", "11-aa", null);
          ColumnVector expected = ColumnVector.fromStrings("aa-11:aa:11; b2b-345:b:345;",
              "aa-11:aa:11;a 1c-2:c:2;b2 b2-c3", "11-aa", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "([a-z]+)-([0-9]+)", "${0}:${1}:${2};")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("([a-z]+)-([0-9]+)", "${0}:${1}:${2};");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("([a-z]+)-([0-9]+)"), "${0}:${1}:${2};")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     // group index exceeds group count
@@ -5236,6 +5259,13 @@ void testStringReplaceWithBackrefs() {
       }
     });
 
+    // group index exceeds group count
+    assertThrows(CudfException.class, () -> {
+      try (ColumnVector v = ColumnVector.fromStrings("ABC123defgh");
+           ColumnVector r =
+               v.stringReplaceWithBackrefs(new RegexProgram("([A-Z]+)([0-9]+)([a-z]+)"), "\\4")) {
+      }
+    });
   }
 
   @Test

From c20c8b42215e38bee207b49dad6e28ea04ccbd8c Mon Sep 17 00:00:00 2001
From: Sevag H <shanssian@nvidia.com>
Date: Wed, 8 Feb 2023 16:50:48 -0500
Subject: [PATCH 12/24] Bump pinned rapids wheel deps to 23.4 (#12735)

We introduced a change to pin RAPIDS wheel dependencies to the same release version. However, branch 23.04 was created before that last PR was merged, so as of now cudf's 23.4 wheels are installing 23.2 RAPIDS dependencies. This PR updates those pins to the current release.

Authors:
  - Sevag H (https://github.com/sevagh)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12735
---
 python/cudf/setup.py      | 2 +-
 python/dask_cudf/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 48199d83478..88bc2cfae28 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -21,7 +21,7 @@
     "typing_extensions",
     # Allow floating minor versions for Arrow.
     "pyarrow==10",
-    f"rmm{cuda_suffix}==23.2.*",
+    f"rmm{cuda_suffix}==23.4.*",
     f"ptxcompiler{cuda_suffix}",
     f"cubinlinker{cuda_suffix}",
     "cupy-cuda11x",
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 4b420b1b97c..be4c704019d 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -13,7 +13,7 @@
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",
-    f"cudf{cuda_suffix}==23.2.*",
+    f"cudf{cuda_suffix}==23.4.*",
     "cupy-cuda11x",
 ]
 

From 3e4ff2afb8a5ca7f1ce051f2d86945688dfb21f9 Mon Sep 17 00:00:00 2001
From: Ajay Thorve <AjayThorve@users.noreply.github.com>
Date: Wed, 8 Feb 2023 16:25:28 -0800
Subject: [PATCH 13/24] Reduce error handling verbosity in CI tests scripts
 (#12738)

This PR adds a less verbose [trap method](https://github.com/rapidsai/cugraph/blob/f2b081075704aabc789603e14ce552eac3fbe692/ci/test.sh#L19), for error handling to help ensure that we capture all potential error codes in our test scripts, and works as follows:

- setting an environment variable, EXITCODE, with a default value of 0
- setting a trap statement triggered by ERR signals which will set EXITCODE=1 when any commands return a non-zero exit code

cc @ajschmidt8

Authors:
  - Ajay Thorve (https://github.com/AjayThorve)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12738
---
 ci/test_cpp.sh           | 12 ++++--------
 ci/test_java.sh          | 13 ++++---------
 ci/test_notebooks.sh     |  9 ++++-----
 ci/test_python_common.sh |  3 +--
 ci/test_python_cudf.sh   | 25 +++++--------------------
 ci/test_python_other.sh  | 31 +++++--------------------------
 6 files changed, 23 insertions(+), 70 deletions(-)

diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 5b1e8aa398c..b3d7919b279 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -21,7 +21,6 @@ set -u
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
 mkdir -p "${RAPIDS_TESTS_DIR}"
-SUITEERROR=0
 
 rapids-print-env
 
@@ -32,6 +31,8 @@ rapids-mamba-retry install \
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 # TODO: Disabling stream identification for now.
@@ -61,12 +62,6 @@ for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
     #else
     #    GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
     #fi
-
-    exitcode=$?
-    if (( ${exitcode} != 0 )); then
-        SUITEERROR=${exitcode}
-        echo "FAILED: GTest ${gt}"
-    fi
 done
 
 if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
@@ -85,4 +80,5 @@ if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
     # TODO: test-results/*.cs.log are processed in gpuci
 fi
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_java.sh b/ci/test_java.sh
index 27a1f2aa46f..f905aaa1178 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -29,22 +29,17 @@ rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   libcudf
 
-SUITEERROR=0
-
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "Run Java tests"
 pushd java
 mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF -DCUDF_JNI_ENABLE_PROFILING=OFF
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf Java"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index f1e17162195..7f5f35219b0 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -36,9 +36,8 @@ pushd notebooks
 # (space-separated list of filenames without paths)
 SKIPNBS=""
 
-# Set SUITEERROR to failure if any run fails
-SUITEERROR=0
-
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 for nb in $(find . -name "*.ipynb"); do
     nbBasename=$(basename ${nb})
@@ -55,8 +54,8 @@ for nb in $(find . -name "*.ipynb"); do
     else
         nvidia-smi
         ${NBTEST} ${nbBasename}
-        SUITEERROR=$((SUITEERROR | $?))
     fi
 done
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index 107540c0192..0e922c105dd 100755
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 
@@ -27,7 +27,6 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
-SUITEERROR=0
 
 rapids-print-env
 
diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index bea162a9318..337ef38cf97 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 source "$(dirname "$0")/test_python_common.sh"
@@ -7,6 +7,8 @@ source "$(dirname "$0")/test_python_common.sh"
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "pytest cudf"
@@ -24,12 +26,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-coverage.xml" \
   --cov-report=term \
   tests
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 popd
 
 # Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
@@ -48,12 +44,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-coverage.xml" \
   --cov-report=term \
   benchmarks
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 
 rapids-logger "pytest for cudf benchmarks using pandas"
 CUDF_BENCHMARKS_USE_PANDAS=ON \
@@ -67,12 +57,7 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-pandas-coverage.xml" \
   --cov-report=term \
   benchmarks
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
index d7a5e288193..b79cd44cdbe 100755
--- a/ci/test_python_other.sh
+++ b/ci/test_python_other.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 source "$(dirname "$0")/test_python_common.sh"
@@ -12,6 +12,8 @@ rapids-mamba-retry install \
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "pytest dask_cudf"
@@ -26,12 +28,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
   --cov-report=term \
   dask_cudf
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in dask-cudf"
-fi
 popd
 
 rapids-logger "pytest custreamz"
@@ -46,12 +42,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
   --cov-report=term \
   custreamz
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in custreamz"
-fi
 popd
 
 set -e
@@ -73,12 +63,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/strings-udf-coverage.xml" \
   --cov-report=term \
   tests
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in strings_udf"
-fi
 popd
 
 rapids-logger "pytest cudf with strings_udf"
@@ -94,12 +78,7 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-strings-udf-coverage.xml" \
   --cov-report=term \
   tests/test_udf_masked_ops.py
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf with strings_udf"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}

From 74efb790cfbf5f9563ff88f9c835dc03570605f9 Mon Sep 17 00:00:00 2001
From: Liangcai Li <firestarmanllc@gmail.com>
Date: Thu, 9 Feb 2023 10:20:45 +0800
Subject: [PATCH 14/24] Allow setting the seed argument for hash partition
 (#12715)

This PR is exposing the `seed` parameter for the JNI hash partition APIs to support customizing the hash algorithm seed.

The existing tests should cover this change.

Authors:
  - Liangcai Li (https://github.com/firestarman)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/12715
---
 java/src/main/java/ai/rapids/cudf/Table.java | 16 ++++++++++++++++
 java/src/main/native/src/TableJni.cpp        |  7 ++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 3eed7e45eed..3ccab70ccda 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -194,6 +194,7 @@ private static native long[] hashPartition(long inputTable,
                                              int[] columnsToHash,
                                              int hashTypeId,
                                              int numberOfPartitions,
+                                             int seed,
                                              int[] outputOffsets) throws CudfException;
 
   private static native long[] roundRobinPartition(long inputTable,
@@ -4253,12 +4254,27 @@ public PartitionedTable hashPartition(int numberOfPartitions) {
      * {@link Table} class
      */
     public PartitionedTable hashPartition(HashType type, int numberOfPartitions) {
+      final int DEFAULT_HASH_SEED = 0;
+      return hashPartition(type, numberOfPartitions, DEFAULT_HASH_SEED);
+    }
+
+    /**
+     * Hash partition a table into the specified number of partitions.
+     * @param type the type of hash to use. Depending on the type of hash different restrictions
+     *             on the hash column(s) may exist. Not all hash functions are guaranteed to work
+     *             besides IDENTITY and MURMUR3.
+     * @param numberOfPartitions number of partitions to use
+     * @param seed the seed value for hashing
+     * @return Table that exposes a limited functionality of the {@link Table} class
+     */
+    public PartitionedTable hashPartition(HashType type, int numberOfPartitions, int seed) {
       int[] partitionOffsets = new int[numberOfPartitions];
       return new PartitionedTable(new Table(Table.hashPartition(
           operation.table.nativeHandle,
           operation.indices,
           type.nativeId,
           partitionOffsets.length,
+          seed,
           partitionOffsets)), partitionOffsets);
     }
   }
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 3d730ff61a1..0b3ccb59a39 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -2655,7 +2655,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_partition(JNIEnv *env, jc
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(
     JNIEnv *env, jclass, jlong input_table, jintArray columns_to_hash, jint hash_function,
-    jint number_of_partitions, jintArray output_offsets) {
+    jint number_of_partitions, jint seed, jintArray output_offsets) {
 
   JNI_NULL_CHECK(env, input_table, "input table is null", NULL);
   JNI_NULL_CHECK(env, columns_to_hash, "columns_to_hash is null", NULL);
@@ -2665,6 +2665,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(
   try {
     cudf::jni::auto_set_device(env);
     auto const hash_func = static_cast<cudf::hash_id>(hash_function);
+    auto const hash_seed = static_cast<uint32_t>(seed);
     auto const n_input_table = reinterpret_cast<cudf::table_view const *>(input_table);
     cudf::jni::native_jintArray n_columns_to_hash(env, columns_to_hash);
     JNI_ARG_CHECK(env, n_columns_to_hash.size() > 0, "columns_to_hash is zero", NULL);
@@ -2672,8 +2673,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(
     std::vector<cudf::size_type> columns_to_hash_vec(n_columns_to_hash.begin(),
                                                      n_columns_to_hash.end());
 
-    auto [partitioned_table, partition_offsets] =
-        cudf::hash_partition(*n_input_table, columns_to_hash_vec, number_of_partitions, hash_func);
+    auto [partitioned_table, partition_offsets] = cudf::hash_partition(
+        *n_input_table, columns_to_hash_vec, number_of_partitions, hash_func, hash_seed);
 
     cudf::jni::native_jintArray n_output_offsets(env, output_offsets);
     std::copy(partition_offsets.begin(), partition_offsets.end(), n_output_offsets.begin());

From b0335f0c928b748b7c107a159cf4bb17c0839bd1 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Thu, 9 Feb 2023 13:56:06 +0100
Subject: [PATCH 15/24] `partition_by_hash()`: use `_split()` (#12704)

Reduce overhead of `Frame.partition_by_hash()` by calling `Frame._split()`.

#### Benchmark
Small benchmark of this PR shows ~1.5x speedup:
https://gist.github.com/madsbk/308df2dd58309510610fd27e0529f862


##

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12704
---
 python/cudf/cudf/core/dataframe.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1ebf59ba6e4..535fe2352aa 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4614,10 +4614,13 @@ def partition_by_hash(self, columns, nparts, keep_index=True):
             self._column_names,
             self._index_names if keep_index else None,
         )
-        # Slice into partition
-        ret = [outdf[s:e] for s, e in zip(offsets, offsets[1:] + [None])]
-        if not keep_index:
-            ret = [df.reset_index(drop=True) for df in ret]
+        # Slice into partitions. Notice, `hash_partition` returns the start
+        # offset of each partition thus we skip the first offset
+        ret = outdf._split(offsets[1:], keep_index=keep_index)
+
+        # Calling `_split()` on an empty dataframe returns an empty list
+        # so we add empty partitions here
+        ret += [self._empty_like(keep_index) for _ in range(nparts - len(ret))]
         return ret
 
     def info(

From 8d17379bd821edc67c3a3f93801d3e79b8e2d97f Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 9 Feb 2023 08:33:46 -0500
Subject: [PATCH 16/24] Fix memcheck read error in compound segmented reduce
 (#12722)

Fixes an out-of-bounds memory read in the compound segmented reduction logic. The number of segments was computed incorrectly causing an extra read passed the end of the valid-counts vector. This was found by running compute-sanitizer test on the reductions gtests as follows:
```
compute-sanitizer --tool memcheck --demangle full gtests/REDUCTIONS_TEST --rmm_mode=cuda
```
The number of segments is 1 less than the number of offsets.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12722
---
 cpp/include/cudf/detail/segmented_reduction.cuh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/include/cudf/detail/segmented_reduction.cuh b/cpp/include/cudf/detail/segmented_reduction.cuh
index 9a49c1abe38..1c39d5eab1e 100644
--- a/cpp/include/cudf/detail/segmented_reduction.cuh
+++ b/cpp/include/cudf/detail/segmented_reduction.cuh
@@ -145,10 +145,10 @@ void segmented_reduce(InputIterator d_in,
                       size_type* d_valid_counts,
                       rmm::cuda_stream_view stream)
 {
-  using OutputType         = typename thrust::iterator_value<OutputIterator>::type;
-  using IntermediateType   = typename thrust::iterator_value<InputIterator>::type;
-  auto num_segments        = static_cast<size_type>(std::distance(d_offset_begin, d_offset_end));
-  auto const binary_op     = op.get_binary_op();
+  using OutputType       = typename thrust::iterator_value<OutputIterator>::type;
+  using IntermediateType = typename thrust::iterator_value<InputIterator>::type;
+  auto num_segments      = static_cast<size_type>(std::distance(d_offset_begin, d_offset_end)) - 1;
+  auto const binary_op   = op.get_binary_op();
   auto const initial_value = op.template get_identity<IntermediateType>();
 
   rmm::device_uvector<IntermediateType> intermediate_result{static_cast<std::size_t>(num_segments),

From 3b2682f0a8d3cfe3b106d2922b4a425dda0162e1 Mon Sep 17 00:00:00 2001
From: Raymond Douglass <ray@raydouglass.com>
Date: Thu, 9 Feb 2023 10:08:45 -0500
Subject: [PATCH 17/24] update changelog

---
 CHANGELOG.md | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 240 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d653d503a1e..4acad48eabf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,244 @@
-# cuDF 23.02.00 (Date TBD)
+# cuDF 23.02.00 (9 Feb 2023)
 
-Please see https://github.com/rapidsai/cudf/releases/tag/v23.02.00a for the latest changes to this development branch.
+## 🚨 Breaking Changes
+
+- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar)
+- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule)
+- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule)
+- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar)
+- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann)
+- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar)
+- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule)
+- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann)
+- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr)
+- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann)
+- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia)
+- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia)
+- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice)
+- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-)
+
+## 🐛 Bug Fixes
+
+- Fix a mask data corruption in UDF ([#12647](https://github.com/rapidsai/cudf/pull/12647)) [@galipremsagar](https://github.com/galipremsagar)
+- pre-commit: Update isort version to 5.12.0 ([#12645](https://github.com/rapidsai/cudf/pull/12645)) [@wence-](https://github.com/wence-)
+- tests: Skip cuInit tests if cuda-gdb is not found or not working ([#12644](https://github.com/rapidsai/cudf/pull/12644)) [@wence-](https://github.com/wence-)
+- Revert regex program java APIs and tests ([#12639](https://github.com/rapidsai/cudf/pull/12639)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
+- Fix leaks in ColumnVectorTest ([#12625](https://github.com/rapidsai/cudf/pull/12625)) [@jlowe](https://github.com/jlowe)
+- Handle when spillable buffers own each other ([#12607](https://github.com/rapidsai/cudf/pull/12607)) [@madsbk](https://github.com/madsbk)
+- Fix incorrect null counts for sliced columns in JCudfSerialization ([#12589](https://github.com/rapidsai/cudf/pull/12589)) [@jlowe](https://github.com/jlowe)
+- lists: Transfer dtypes correctly through list.get ([#12586](https://github.com/rapidsai/cudf/pull/12586)) [@wence-](https://github.com/wence-)
+- timedelta: Don&#39;t go via float intermediates for floordiv ([#12585](https://github.com/rapidsai/cudf/pull/12585)) [@wence-](https://github.com/wence-)
+- Fixing BUG, `get_next_chunk()` should use the blocking function `device_read()` ([#12584](https://github.com/rapidsai/cudf/pull/12584)) [@madsbk](https://github.com/madsbk)
+- Make JNI QuoteStyle accessible outside ai.rapids.cudf ([#12572](https://github.com/rapidsai/cudf/pull/12572)) [@mythrocks](https://github.com/mythrocks)
+- `partition_by_hash()`: support index ([#12554](https://github.com/rapidsai/cudf/pull/12554)) [@madsbk](https://github.com/madsbk)
+- Mixed Join benchmark bug due to wrong conditional column ([#12553](https://github.com/rapidsai/cudf/pull/12553)) [@divyegala](https://github.com/divyegala)
+- Update List Lexicographical Comparator ([#12538](https://github.com/rapidsai/cudf/pull/12538)) [@divyegala](https://github.com/divyegala)
+- Dynamically read PTX version ([#12534](https://github.com/rapidsai/cudf/pull/12534)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- build.sh switch to use `RAPIDS` magic value ([#12525](https://github.com/rapidsai/cudf/pull/12525)) [@robertmaynard](https://github.com/robertmaynard)
+- Loosen runtime arrow pinning ([#12522](https://github.com/rapidsai/cudf/pull/12522)) [@vyasr](https://github.com/vyasr)
+- Enable metadata transfer for complex types in transpose ([#12491](https://github.com/rapidsai/cudf/pull/12491)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix issues with parquet chunked reader ([#12488](https://github.com/rapidsai/cudf/pull/12488)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fix missing metadata transfer in concat for `ListColumn` ([#12487](https://github.com/rapidsai/cudf/pull/12487)) [@galipremsagar](https://github.com/galipremsagar)
+- Rename libcudf substring source files to slice ([#12484](https://github.com/rapidsai/cudf/pull/12484)) [@davidwendt](https://github.com/davidwendt)
+- Fix compile issue with arrow 10 ([#12465](https://github.com/rapidsai/cudf/pull/12465)) [@ttnghia](https://github.com/ttnghia)
+- Fix List offsets bug in mixed type list column in nested JSON reader ([#12447](https://github.com/rapidsai/cudf/pull/12447)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix xfail incompatibilities ([#12423](https://github.com/rapidsai/cudf/pull/12423)) [@vyasr](https://github.com/vyasr)
+- Fix bug in Parquet column index encoding ([#12404](https://github.com/rapidsai/cudf/pull/12404)) [@etseidl](https://github.com/etseidl)
+- When building Arrow shared look for a shared OpenSSL ([#12396](https://github.com/rapidsai/cudf/pull/12396)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix get_json_object to return empty column on empty input ([#12384](https://github.com/rapidsai/cudf/pull/12384)) [@davidwendt](https://github.com/davidwendt)
+- Pin arrow 9 in testing dependencies to prevent conda solve issues ([#12377](https://github.com/rapidsai/cudf/pull/12377)) [@vyasr](https://github.com/vyasr)
+- Fix reductions any/all return value for empty input ([#12374](https://github.com/rapidsai/cudf/pull/12374)) [@davidwendt](https://github.com/davidwendt)
+- Fix debug compile errors in parquet.hpp ([#12372](https://github.com/rapidsai/cudf/pull/12372)) [@davidwendt](https://github.com/davidwendt)
+- Purge non-empty nulls in `cudf::make_lists_column` ([#12370](https://github.com/rapidsai/cudf/pull/12370)) [@ttnghia](https://github.com/ttnghia)
+- Use correct memory resource in io::make_column ([#12364](https://github.com/rapidsai/cudf/pull/12364)) [@vyasr](https://github.com/vyasr)
+- Add code to detect possible malformed page data in parquet files. ([#12360](https://github.com/rapidsai/cudf/pull/12360)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule)
+- Fix NumericPairIteratorTest for float values ([#12306](https://github.com/rapidsai/cudf/pull/12306)) [@davidwendt](https://github.com/davidwendt)
+- Fixes memory allocation in nested JSON tokenizer ([#12300](https://github.com/rapidsai/cudf/pull/12300)) [@elstehle](https://github.com/elstehle)
+- Reconstruct dtypes correctly for list aggs of struct columns ([#12290](https://github.com/rapidsai/cudf/pull/12290)) [@wence-](https://github.com/wence-)
+- Fix regex \A and \Z to strictly match string begin/end ([#12282](https://github.com/rapidsai/cudf/pull/12282)) [@davidwendt](https://github.com/davidwendt)
+- Fix compile issue in `json_chunked_reader.cpp` ([#12280](https://github.com/rapidsai/cudf/pull/12280)) [@ttnghia](https://github.com/ttnghia)
+- Change reductions any/all to return valid values for empty input ([#12279](https://github.com/rapidsai/cudf/pull/12279)) [@davidwendt](https://github.com/davidwendt)
+- Only exclude join keys that are indices from key columns ([#12271](https://github.com/rapidsai/cudf/pull/12271)) [@wence-](https://github.com/wence-)
+- Fix spill to device limit ([#12252](https://github.com/rapidsai/cudf/pull/12252)) [@madsbk](https://github.com/madsbk)
+- Correct behaviour of sort in `concat` for singleton concatenations ([#12247](https://github.com/rapidsai/cudf/pull/12247)) [@wence-](https://github.com/wence-)
+- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia)
+- Patch CUB DeviceSegmentedSort and remove workaround ([#12234](https://github.com/rapidsai/cudf/pull/12234)) [@davidwendt](https://github.com/davidwendt)
+- Fix memory leak in udf_string::assign(&amp;&amp;) function ([#12206](https://github.com/rapidsai/cudf/pull/12206)) [@davidwendt](https://github.com/davidwendt)
+- Workaround thrust-copy-if limit in json get_tree_representation ([#12190](https://github.com/rapidsai/cudf/pull/12190)) [@davidwendt](https://github.com/davidwendt)
+- Fix page size calculation in Parquet writer ([#12182](https://github.com/rapidsai/cudf/pull/12182)) [@etseidl](https://github.com/etseidl)
+- Add cudf::detail::sizes_to_offsets_iterator to allow checking overflow in offsets ([#12180](https://github.com/rapidsai/cudf/pull/12180)) [@davidwendt](https://github.com/davidwendt)
+- Workaround thrust-copy-if limit in wordpiece-tokenizer ([#12168](https://github.com/rapidsai/cudf/pull/12168)) [@davidwendt](https://github.com/davidwendt)
+- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-)
+
+## 📖 Documentation
+
+- Fix link to NVTX ([#12598](https://github.com/rapidsai/cudf/pull/12598)) [@sameerz](https://github.com/sameerz)
+- Include missing groupby functions in documentation ([#12580](https://github.com/rapidsai/cudf/pull/12580)) [@quasiben](https://github.com/quasiben)
+- Fix documentation author ([#12527](https://github.com/rapidsai/cudf/pull/12527)) [@bdice](https://github.com/bdice)
+- Update libcudf reduction docs for casting output types ([#12526](https://github.com/rapidsai/cudf/pull/12526)) [@davidwendt](https://github.com/davidwendt)
+- Add JSON reader page in user guide ([#12499](https://github.com/rapidsai/cudf/pull/12499)) [@GregoryKimball](https://github.com/GregoryKimball)
+- Link unsupported iteration API docstrings ([#12482](https://github.com/rapidsai/cudf/pull/12482)) [@galipremsagar](https://github.com/galipremsagar)
+- `strings_udf` doc update ([#12469](https://github.com/rapidsai/cudf/pull/12469)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Update cudf_assert docs with correct NDEBUG behavior ([#12464](https://github.com/rapidsai/cudf/pull/12464)) [@robertmaynard](https://github.com/robertmaynard)
+- Update pre-commit hooks guide ([#12395](https://github.com/rapidsai/cudf/pull/12395)) [@bdice](https://github.com/bdice)
+- Update test docs to not use detail comparison utilities ([#12332](https://github.com/rapidsai/cudf/pull/12332)) [@PointKernel](https://github.com/PointKernel)
+- Fix doxygen description for regex_program::compute_working_memory_size ([#12329](https://github.com/rapidsai/cudf/pull/12329)) [@davidwendt](https://github.com/davidwendt)
+- Add eval to docs. ([#12322](https://github.com/rapidsai/cudf/pull/12322)) [@vyasr](https://github.com/vyasr)
+- Turn on xfail_strict=true ([#12244](https://github.com/rapidsai/cudf/pull/12244)) [@wence-](https://github.com/wence-)
+- Update 10 minutes to cuDF ([#12114](https://github.com/rapidsai/cudf/pull/12114)) [@wence-](https://github.com/wence-)
+
+## 🚀 New Features
+
+- Use kvikIO as the default IO backend ([#12574](https://github.com/rapidsai/cudf/pull/12574)) [@vuule](https://github.com/vuule)
+- Use `has_nonempty_nulls` instead of `may_contain_non_empty_nulls` in `superimpose_nulls` and `push_down_nulls` ([#12560](https://github.com/rapidsai/cudf/pull/12560)) [@ttnghia](https://github.com/ttnghia)
+- Add strings methods removeprefix and removesuffix ([#12557](https://github.com/rapidsai/cudf/pull/12557)) [@davidwendt](https://github.com/davidwendt)
+- Add `regex_program` java APIs and unit tests ([#12548](https://github.com/rapidsai/cudf/pull/12548)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
+- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule)
+- Make string quoting optional on CSV write ([#12539](https://github.com/rapidsai/cudf/pull/12539)) [@mythrocks](https://github.com/mythrocks)
+- Use new nvCOMP API to optimize the compression temp memory size ([#12533](https://github.com/rapidsai/cudf/pull/12533)) [@vuule](https://github.com/vuule)
+- Support &quot;values&quot; orient (array of arrays) in Nested JSON reader ([#12498](https://github.com/rapidsai/cudf/pull/12498)) [@karthikeyann](https://github.com/karthikeyann)
+- `one_hot_encode` to use experimental row comparators ([#12478](https://github.com/rapidsai/cudf/pull/12478)) [@divyegala](https://github.com/divyegala)
+- Support %W and %w format specifiers in cudf::strings::to_timestamps ([#12475](https://github.com/rapidsai/cudf/pull/12475)) [@davidwendt](https://github.com/davidwendt)
+- Add JSON Writer ([#12474](https://github.com/rapidsai/cudf/pull/12474)) [@karthikeyann](https://github.com/karthikeyann)
+- Refactor `thrust_copy_if` into `cudf::detail::copy_if_safe` ([#12455](https://github.com/rapidsai/cudf/pull/12455)) [@ttnghia](https://github.com/ttnghia)
+- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann)
+- Extract `tokenize_json.hpp` detail header from `src/io/json/nested_json.hpp` ([#12432](https://github.com/rapidsai/cudf/pull/12432)) [@ttnghia](https://github.com/ttnghia)
+- JNI bindings to write CSV ([#12425](https://github.com/rapidsai/cudf/pull/12425)) [@mythrocks](https://github.com/mythrocks)
+- Nested JSON depth benchmark ([#12371](https://github.com/rapidsai/cudf/pull/12371)) [@karthikeyann](https://github.com/karthikeyann)
+- Implement `lists::reverse` ([#12336](https://github.com/rapidsai/cudf/pull/12336)) [@ttnghia](https://github.com/ttnghia)
+- Use `device_read` in experimental `read_json` ([#12314](https://github.com/rapidsai/cudf/pull/12314)) [@vuule](https://github.com/vuule)
+- Implement JNI for `strings::reverse` ([#12283](https://github.com/rapidsai/cudf/pull/12283)) [@ttnghia](https://github.com/ttnghia)
+- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann)
+- Add cudf::strings:like function with multiple patterns ([#12269](https://github.com/rapidsai/cudf/pull/12269)) [@davidwendt](https://github.com/davidwendt)
+- Add environment variable to control host memory allocation in `hostdevice_vector` ([#12251](https://github.com/rapidsai/cudf/pull/12251)) [@vuule](https://github.com/vuule)
+- Add cudf::strings::reverse function ([#12227](https://github.com/rapidsai/cudf/pull/12227)) [@davidwendt](https://github.com/davidwendt)
+- Selectively use dictionary encoding in Parquet writer ([#12211](https://github.com/rapidsai/cudf/pull/12211)) [@etseidl](https://github.com/etseidl)
+- Support `replace` in `strings_udf` ([#12207](https://github.com/rapidsai/cudf/pull/12207)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add support to read binary encoded decimals in parquet ([#12205](https://github.com/rapidsai/cudf/pull/12205)) [@PointKernel](https://github.com/PointKernel)
+- Support regex EOL where the string ends with a new-line character ([#12181](https://github.com/rapidsai/cudf/pull/12181)) [@davidwendt](https://github.com/davidwendt)
+- Updating `stream_compaction/unique` to use new row comparators ([#12159](https://github.com/rapidsai/cudf/pull/12159)) [@divyegala](https://github.com/divyegala)
+- Add device buffer datasource ([#12024](https://github.com/rapidsai/cudf/pull/12024)) [@PointKernel](https://github.com/PointKernel)
+- Implement groupby apply with JIT ([#11452](https://github.com/rapidsai/cudf/pull/11452)) [@bwyogatama](https://github.com/bwyogatama)
+
+## 🛠️ Improvements
+
+- Update shared workflow branches ([#12696](https://github.com/rapidsai/cudf/pull/12696)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar)
+- Don&#39;t upload `libcudf-example` to Anaconda.org ([#12671](https://github.com/rapidsai/cudf/pull/12671)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Pin wheel dependencies to same RAPIDS release ([#12659](https://github.com/rapidsai/cudf/pull/12659)) [@sevagh](https://github.com/sevagh)
+- Use CTK 118/cp310 branch of wheel workflows ([#12602](https://github.com/rapidsai/cudf/pull/12602)) [@sevagh](https://github.com/sevagh)
+- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar)
+- Version a parquet writer xfail ([#12579](https://github.com/rapidsai/cudf/pull/12579)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule)
+- Parquet reader optimization to address V100 regression. ([#12577](https://github.com/rapidsai/cudf/pull/12577)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Add support for `category` dtypes in CSV reader ([#12571](https://github.com/rapidsai/cudf/pull/12571)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove `spill_lock` parameter from `SpillableBuffer.get_ptr()` ([#12564](https://github.com/rapidsai/cudf/pull/12564)) [@madsbk](https://github.com/madsbk)
+- Optimize `cudf::make_lists_column` ([#12547](https://github.com/rapidsai/cudf/pull/12547)) [@ttnghia](https://github.com/ttnghia)
+- Remove `cudf::strings::repeat_strings_output_sizes` from Java and JNI ([#12546](https://github.com/rapidsai/cudf/pull/12546)) [@ttnghia](https://github.com/ttnghia)
+- Test that cuInit is not called when RAPIDS_NO_INITIALIZE is set ([#12545](https://github.com/rapidsai/cudf/pull/12545)) [@wence-](https://github.com/wence-)
+- Rework repeat_strings to use sizes-to-offsets utility ([#12543](https://github.com/rapidsai/cudf/pull/12543)) [@davidwendt](https://github.com/davidwendt)
+- Replace exclusive_scan with sizes_to_offsets in cudf::lists::sequences ([#12541](https://github.com/rapidsai/cudf/pull/12541)) [@davidwendt](https://github.com/davidwendt)
+- Rework nvtext::ngrams_tokenize to use sizes-to-offsets utility ([#12540](https://github.com/rapidsai/cudf/pull/12540)) [@davidwendt](https://github.com/davidwendt)
+- Fix binary-ops gtests coded in namespace cudf::test ([#12536](https://github.com/rapidsai/cudf/pull/12536)) [@davidwendt](https://github.com/davidwendt)
+- More `[@acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk](https://github.com/acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk)
+- Guard CUDA runtime APIs with error checking ([#12531](https://github.com/rapidsai/cudf/pull/12531)) [@PointKernel](https://github.com/PointKernel)
+- Update TODOs from issue 10432. ([#12528](https://github.com/rapidsai/cudf/pull/12528)) [@bdice](https://github.com/bdice)
+- Update rapids-cmake definitions version in GitHub Actions style checks. ([#12511](https://github.com/rapidsai/cudf/pull/12511)) [@bdice](https://github.com/bdice)
+- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix SUM/MEAN aggregation type support. ([#12503](https://github.com/rapidsai/cudf/pull/12503)) [@bdice](https://github.com/bdice)
+- Stop using pandas._testing ([#12492](https://github.com/rapidsai/cudf/pull/12492)) [@vyasr](https://github.com/vyasr)
+- Fix ROLLING_TEST gtests coded in namespace cudf::test ([#12490](https://github.com/rapidsai/cudf/pull/12490)) [@davidwendt](https://github.com/davidwendt)
+- Fix erroneously skipped ORC ZSTD test ([#12486](https://github.com/rapidsai/cudf/pull/12486)) [@vuule](https://github.com/vuule)
+- Rework nvtext::generate_character_ngrams to use make_strings_children ([#12480](https://github.com/rapidsai/cudf/pull/12480)) [@davidwendt](https://github.com/davidwendt)
+- Raise warnings as errors in the test suite ([#12468](https://github.com/rapidsai/cudf/pull/12468)) [@vyasr](https://github.com/vyasr)
+- Remove `int32` hard-coding in python ([#12467](https://github.com/rapidsai/cudf/pull/12467)) [@galipremsagar](https://github.com/galipremsagar)
+- Use cudaMemcpyDefault. ([#12466](https://github.com/rapidsai/cudf/pull/12466)) [@bdice](https://github.com/bdice)
+- Update workflows for nightly tests ([#12462](https://github.com/rapidsai/cudf/pull/12462)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Build CUDA `11.8` and Python `3.10` Packages ([#12457](https://github.com/rapidsai/cudf/pull/12457)) [@ajschmidt8](https://github.com/ajschmidt8)
+- JNI build image default as cuda11.8 ([#12441](https://github.com/rapidsai/cudf/pull/12441)) [@pxLi](https://github.com/pxLi)
+- Re-enable `Recently Updated` Check ([#12435](https://github.com/rapidsai/cudf/pull/12435)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework remaining cudf::strings::from_xyz functions to use make_strings_children ([#12434](https://github.com/rapidsai/cudf/pull/12434)) [@vuule](https://github.com/vuule)
+- Build wheels alongside conda CI ([#12427](https://github.com/rapidsai/cudf/pull/12427)) [@sevagh](https://github.com/sevagh)
+- Remove arguments for checking exception messages in Python ([#12424](https://github.com/rapidsai/cudf/pull/12424)) [@vyasr](https://github.com/vyasr)
+- Clean up cuco usage ([#12421](https://github.com/rapidsai/cudf/pull/12421)) [@PointKernel](https://github.com/PointKernel)
+- Fix warnings in remaining modules ([#12406](https://github.com/rapidsai/cudf/pull/12406)) [@vyasr](https://github.com/vyasr)
+- Update `ops-bot.yaml` ([#12402](https://github.com/rapidsai/cudf/pull/12402)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework cudf::strings::integers_to_ipv4 to use make_strings_children utility ([#12401](https://github.com/rapidsai/cudf/pull/12401)) [@davidwendt](https://github.com/davidwendt)
+- Use `numpy.empty()` instead of `bytearray` to allocate host memory for spilling ([#12399](https://github.com/rapidsai/cudf/pull/12399)) [@madsbk](https://github.com/madsbk)
+- Deprecate chunksize from dask_cudf.read_csv ([#12394](https://github.com/rapidsai/cudf/pull/12394)) [@rjzamora](https://github.com/rjzamora)
+- Expose the RMM pool size in JNI ([#12390](https://github.com/rapidsai/cudf/pull/12390)) [@revans2](https://github.com/revans2)
+- Fix COPYING_TEST: gtests coded in namespace cudf::test ([#12387](https://github.com/rapidsai/cudf/pull/12387)) [@davidwendt](https://github.com/davidwendt)
+- Rework cudf::strings::url_encode to use make_strings_children utility ([#12385](https://github.com/rapidsai/cudf/pull/12385)) [@davidwendt](https://github.com/davidwendt)
+- Use make_strings_children in parse_data nested json reader ([#12382](https://github.com/rapidsai/cudf/pull/12382)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix warnings in test_datetime.py ([#12381](https://github.com/rapidsai/cudf/pull/12381)) [@vyasr](https://github.com/vyasr)
+- Mixed Join Benchmarks ([#12375](https://github.com/rapidsai/cudf/pull/12375)) [@divyegala](https://github.com/divyegala)
+- Fix warnings in dataframe.py ([#12369](https://github.com/rapidsai/cudf/pull/12369)) [@vyasr](https://github.com/vyasr)
+- Update conda recipes. ([#12368](https://github.com/rapidsai/cudf/pull/12368)) [@bdice](https://github.com/bdice)
+- Use gpu-latest-1 runner tag ([#12366](https://github.com/rapidsai/cudf/pull/12366)) [@bdice](https://github.com/bdice)
+- Rework cudf::strings::from_booleans to use make_strings_children ([#12365](https://github.com/rapidsai/cudf/pull/12365)) [@vuule](https://github.com/vuule)
+- Fix warnings in test modules up to test_dataframe.py ([#12355](https://github.com/rapidsai/cudf/pull/12355)) [@vyasr](https://github.com/vyasr)
+- JSON column performance optimization - struct column nulls ([#12354](https://github.com/rapidsai/cudf/pull/12354)) [@karthikeyann](https://github.com/karthikeyann)
+- Accelerate stable-segmented-sort with CUB segmented sort ([#12347](https://github.com/rapidsai/cudf/pull/12347)) [@davidwendt](https://github.com/davidwendt)
+- Add size check to make_offsets_child_column utility ([#12345](https://github.com/rapidsai/cudf/pull/12345)) [@davidwendt](https://github.com/davidwendt)
+- Enable max compression ratio small block optimization for ZSTD ([#12338](https://github.com/rapidsai/cudf/pull/12338)) [@vuule](https://github.com/vuule)
+- Fix warnings in test_monotonic.py ([#12334](https://github.com/rapidsai/cudf/pull/12334)) [@vyasr](https://github.com/vyasr)
+- Improve JSON column creation performance (list offsets) ([#12330](https://github.com/rapidsai/cudf/pull/12330)) [@karthikeyann](https://github.com/karthikeyann)
+- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix warnings in test_orc.py ([#12326](https://github.com/rapidsai/cudf/pull/12326)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_groupby.py ([#12324](https://github.com/rapidsai/cudf/pull/12324)) [@vyasr](https://github.com/vyasr)
+- Fix `test_notebooks.sh` ([#12323](https://github.com/rapidsai/cudf/pull/12323)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Fix transform gtests coded in namespace cudf::test ([#12321](https://github.com/rapidsai/cudf/pull/12321)) [@davidwendt](https://github.com/davidwendt)
+- Fix `check_style.sh` script ([#12320](https://github.com/rapidsai/cudf/pull/12320)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Rework cudf::strings::from_timestamps to use make_strings_children ([#12317](https://github.com/rapidsai/cudf/pull/12317)) [@davidwendt](https://github.com/davidwendt)
+- Fix warnings in test_index.py ([#12313](https://github.com/rapidsai/cudf/pull/12313)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_multiindex.py ([#12310](https://github.com/rapidsai/cudf/pull/12310)) [@vyasr](https://github.com/vyasr)
+- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix warnings in test_indexing.py ([#12305](https://github.com/rapidsai/cudf/pull/12305)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_joining.py ([#12304](https://github.com/rapidsai/cudf/pull/12304)) [@vyasr](https://github.com/vyasr)
+- Unpin `dask` and `distributed` for development ([#12302](https://github.com/rapidsai/cudf/pull/12302)) [@galipremsagar](https://github.com/galipremsagar)
+- Re-enable `sccache` for Jenkins builds ([#12297](https://github.com/rapidsai/cudf/pull/12297)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Define needs for pr-builder workflow. ([#12296](https://github.com/rapidsai/cudf/pull/12296)) [@bdice](https://github.com/bdice)
+- Forward merge 22.12 into 23.02 ([#12294](https://github.com/rapidsai/cudf/pull/12294)) [@vyasr](https://github.com/vyasr)
+- Fix warnings in test_stats.py ([#12293](https://github.com/rapidsai/cudf/pull/12293)) [@vyasr](https://github.com/vyasr)
+- Fix table gtests coded in namespace cudf::test ([#12292](https://github.com/rapidsai/cudf/pull/12292)) [@davidwendt](https://github.com/davidwendt)
+- Change cython for regex calls to use cudf::strings::regex_program ([#12289](https://github.com/rapidsai/cudf/pull/12289)) [@davidwendt](https://github.com/davidwendt)
+- Improved error reporting when reading multiple JSON files ([#12285](https://github.com/rapidsai/cudf/pull/12285)) [@vuule](https://github.com/vuule)
+- Deprecate Frame.sum_of_squares ([#12284](https://github.com/rapidsai/cudf/pull/12284)) [@vyasr](https://github.com/vyasr)
+- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr)
+- Clean up handling of max_page_size_bytes in Parquet writer ([#12277](https://github.com/rapidsai/cudf/pull/12277)) [@etseidl](https://github.com/etseidl)
+- Fix replace gtests coded in namespace cudf::test ([#12270](https://github.com/rapidsai/cudf/pull/12270)) [@davidwendt](https://github.com/davidwendt)
+- Add pandas nullable type support in `Index.to_pandas` ([#12268](https://github.com/rapidsai/cudf/pull/12268)) [@galipremsagar](https://github.com/galipremsagar)
+- Rework nvtext::detokenize to use indexalator for row indices ([#12267](https://github.com/rapidsai/cudf/pull/12267)) [@davidwendt](https://github.com/davidwendt)
+- Fix reduction gtests coded in namespace cudf::test ([#12257](https://github.com/rapidsai/cudf/pull/12257)) [@davidwendt](https://github.com/davidwendt)
+- Remove default parameters from cudf::detail::sort function declarations ([#12254](https://github.com/rapidsai/cudf/pull/12254)) [@davidwendt](https://github.com/davidwendt)
+- Add `duplicated` support for `Series`, `DataFrame` and `Index` ([#12246](https://github.com/rapidsai/cudf/pull/12246)) [@galipremsagar](https://github.com/galipremsagar)
+- Replace column/table test utilities with macros ([#12242](https://github.com/rapidsai/cudf/pull/12242)) [@PointKernel](https://github.com/PointKernel)
+- Rework cudf::strings::pad and zfill to use make_strings_children ([#12238](https://github.com/rapidsai/cudf/pull/12238)) [@davidwendt](https://github.com/davidwendt)
+- Fix sort gtests coded in namespace cudf::test ([#12237](https://github.com/rapidsai/cudf/pull/12237)) [@davidwendt](https://github.com/davidwendt)
+- Wrapping concat and file writes in `[@acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk](https://github.com/acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk)
+- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia)
+- Cover parsing to decimal types in `read_json` tests ([#12229](https://github.com/rapidsai/cudf/pull/12229)) [@vuule](https://github.com/vuule)
+- Spill Statistics ([#12223](https://github.com/rapidsai/cudf/pull/12223)) [@madsbk](https://github.com/madsbk)
+- Use CUDF_JNI_ENABLE_PROFILING to conditionally enable profiling support. ([#12221](https://github.com/rapidsai/cudf/pull/12221)) [@bdice](https://github.com/bdice)
+- Clean up of `test_spilling.py` ([#12220](https://github.com/rapidsai/cudf/pull/12220)) [@madsbk](https://github.com/madsbk)
+- Simplify repetitive boolean logic ([#12218](https://github.com/rapidsai/cudf/pull/12218)) [@vuule](https://github.com/vuule)
+- Add `Series.hasnans` and `Index.hasnans` ([#12214](https://github.com/rapidsai/cudf/pull/12214)) [@galipremsagar](https://github.com/galipremsagar)
+- Add cudf::strings:udf::replace function ([#12210](https://github.com/rapidsai/cudf/pull/12210)) [@davidwendt](https://github.com/davidwendt)
+- Adds in new java APIs for appending byte arrays to host columnar data ([#12208](https://github.com/rapidsai/cudf/pull/12208)) [@revans2](https://github.com/revans2)
+- Remove Python dependencies from Java CI. ([#12193](https://github.com/rapidsai/cudf/pull/12193)) [@bdice](https://github.com/bdice)
+- Fix null order in sort-based groupby and improve groupby tests ([#12191](https://github.com/rapidsai/cudf/pull/12191)) [@divyegala](https://github.com/divyegala)
+- Move strings children functions from cudf/strings/detail/utilities.cuh to new header ([#12185](https://github.com/rapidsai/cudf/pull/12185)) [@davidwendt](https://github.com/davidwendt)
+- Clean up existing JNI scalar to column code ([#12173](https://github.com/rapidsai/cudf/pull/12173)) [@revans2](https://github.com/revans2)
+- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice)
+- Update JNI version to 23.02.0-SNAPSHOT ([#12129](https://github.com/rapidsai/cudf/pull/12129)) [@pxLi](https://github.com/pxLi)
+- Minor refactor of cpp/src/io/parquet/page_data.cu ([#12126](https://github.com/rapidsai/cudf/pull/12126)) [@etseidl](https://github.com/etseidl)
+- Add codespell as a linter ([#12097](https://github.com/rapidsai/cudf/pull/12097)) [@benfred](https://github.com/benfred)
+- Enable specifying exceptions in error macros ([#12078](https://github.com/rapidsai/cudf/pull/12078)) [@vyasr](https://github.com/vyasr)
+- Move `_label_encoding` from Series to Column ([#12040](https://github.com/rapidsai/cudf/pull/12040)) [@shwina](https://github.com/shwina)
+- Add GitHub Actions Workflows ([#12002](https://github.com/rapidsai/cudf/pull/12002)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Consolidate dask-cudf `groupby_agg` calls in one place ([#10835](https://github.com/rapidsai/cudf/pull/10835)) [@charlesbluca](https://github.com/charlesbluca)
 
 # cuDF 22.12.00 (8 Dec 2022)
 

From ac60656bc929c08f522d23328a515caa52cee989 Mon Sep 17 00:00:00 2001
From: Ray Douglass <3107146+raydouglass@users.noreply.github.com>
Date: Thu, 9 Feb 2023 11:13:21 -0500
Subject: [PATCH 18/24] Fix update-version.sh (#12745)

Fix the `update-version.sh` script by removing a file that no longer exists

Authors:
   - Ray Douglass (https://github.com/raydouglass)

Approvers:
   - Sevag H (https://github.com/sevagh)
---
 ci/release/update-version.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 555a67d9cd6..96099b0512d 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -43,9 +43,6 @@ sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/
 # Strings UDF update
 sed_runner 's/'"strings_udf_version .*)"'/'"strings_udf_version ${NEXT_FULL_TAG})"'/g' python/strings_udf/CMakeLists.txt
 
-# Groupby UDF update
-sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' python/cudf/udf_cpp/CMakeLists.txt
-
 # cpp libcudf_kafka update
 sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
 

From 0cab19aca4c65453b9d1715b34999dbfc5d0a162 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Thu, 9 Feb 2023 16:52:12 -0500
Subject: [PATCH 19/24] Reduce the number of test cases in multibyte_split
 benchmark (#12737)

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12737
---
 cpp/benchmarks/io/text/multibyte_split.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index c3b7c585055..261243d29fb 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -209,10 +209,21 @@ using source_type_list = nvbench::enum_type_list<data_chunk_source_type::device,
                                                  data_chunk_source_type::host_pinned,
                                                  data_chunk_source_type::file_bgzip>;
 
-NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
-  .set_name("multibyte_split")
+NVBENCH_BENCH_TYPES(bench_multibyte_split,
+                    NVBENCH_TYPE_AXES(nvbench::enum_type_list<data_chunk_source_type::file>))
+  .set_name("multibyte_split_delimiters")
+  .set_min_samples(4)
   .add_int64_axis("strip_delimiters", {0, 1})
   .add_int64_axis("delim_size", {1, 4, 7})
   .add_int64_axis("delim_percent", {1, 25})
+  .add_int64_power_of_two_axis("size_approx", {15})
+  .add_int64_axis("byte_range_percent", {50});
+
+NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
+  .set_name("multibyte_split_source")
+  .set_min_samples(4)
+  .add_int64_axis("strip_delimiters", {1})
+  .add_int64_axis("delim_size", {1})
+  .add_int64_axis("delim_percent", {1})
   .add_int64_power_of_two_axis("size_approx", {15, 30})
-  .add_int64_axis("byte_range_percent", {1, 5, 25, 50, 100});
+  .add_int64_axis("byte_range_percent", {10, 100});

From c931d5abd8b32fa9106cf6dd004ae0fdfde466b9 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Fri, 10 Feb 2023 11:48:08 -0500
Subject: [PATCH 20/24] Update default data source in cuio reader benchmarks
 (#12740)

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12740
---
 cpp/benchmarks/io/csv/csv_reader_input.cpp    | 21 +++++++++++--------
 cpp/benchmarks/io/orc/orc_reader_input.cpp    | 19 ++++++++++-------
 .../io/parquet/parquet_reader_input.cpp       | 21 +++++++++++--------
 3 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
index 27fea856332..a68f689e4db 100644
--- a/cpp/benchmarks/io/csv/csv_reader_input.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,19 +64,20 @@ void csv_read_common(DataType const& data_types,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_csv_read_input(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_csv_read_input(nvbench::state& state,
+                       nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
   auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
-  auto const source_type = io_type::FILEPATH;
+  auto const source_type = IOType;
 
   csv_read_common(d_type, source_type, state);
 }
 
-template <cudf::io::io_type IO>
-void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
+template <cudf::io::io_type IOType>
+void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -86,7 +87,7 @@ void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type
                                          static_cast<int32_t>(data_type::TIMESTAMP),
                                          static_cast<int32_t>(data_type::DURATION),
                                          static_cast<int32_t>(data_type::STRING)});
-  auto const source_type = IO;
+  auto const source_type = IOType;
 
   csv_read_common(d_type, source_type, state);
 }
@@ -101,9 +102,11 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
 using io_list =
   nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;
 
-NVBENCH_BENCH_TYPES(BM_csv_read_input, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_csv_read_input,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("csv_read_data_type")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4);
 
 NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list))
diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index 3f8c096140e..a57a12debc6 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -57,8 +57,9 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_orc_read_data(nvbench::state& state,
+                      nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -72,17 +73,17 @@ void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_ty
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::orc_writer_options opts =
     cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view);
 
   orc_read_common(opts, source_sink, state);
 }
 
-template <cudf::io::io_type IO, cudf::io::compression_type Compression>
+template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
 void BM_orc_read_io_compression(
   nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
+  nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -103,7 +104,7 @@ void BM_orc_read_io_compression(
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(IO);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::orc_writer_options opts =
     cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
       .compression(Compression);
@@ -126,9 +127,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
 
-NVBENCH_BENCH_TYPES(BM_orc_read_data, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_orc_read_data,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("orc_read_decode")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index 36a62903f31..fba69cb2b0f 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -57,8 +57,10 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType>
-void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
+template <data_type DataType, cudf::io::io_type IOType>
+void BM_parquet_read_data(
+  nvbench::state& state,
+  nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -66,7 +68,6 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enu
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
   auto const compression            = cudf::io::compression_type::SNAPPY;
-  auto const source_type            = io_type::FILEPATH;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
@@ -74,7 +75,7 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enu
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(source_type);
+  cuio_source_sink_pair source_sink(IOType);
   cudf::io::parquet_writer_options write_opts =
     cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
       .compression(compression);
@@ -82,10 +83,10 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enu
   parquet_read_common(write_opts, source_sink, state);
 }
 
-template <cudf::io::io_type IO, cudf::io::compression_type Compression>
+template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
 void BM_parquet_read_io_compression(
   nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
+  nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
 {
   cudf::rmm_pool_raii rmm_pool;
 
@@ -101,7 +102,7 @@ void BM_parquet_read_io_compression(
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
   auto const compression            = Compression;
-  auto const source_type            = IO;
+  auto const source_type            = IOType;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
@@ -133,9 +134,11 @@ using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
 using compression_list =
   nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
 
-NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
+NVBENCH_BENCH_TYPES(BM_parquet_read_data,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
   .set_name("parquet_read_decode")
-  .set_type_axes_names({"data_type"})
+  .set_type_axes_names({"data_type", "io"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});

From 048f9368d95189cf398b0389702c84891ade3cb1 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 10 Feb 2023 11:29:49 -0600
Subject: [PATCH 21/24] Unpin `dask` and `distributed` for development (#12710)

This PR unpins `dask` and `distributed` for `23.04` development.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - Peter Andreas Entschev (https://github.com/pentschev)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12710
---
 ci/benchmark/build.sh                            | 2 +-
 ci/cpu/build.sh                                  | 2 +-
 ci/gpu/build.sh                                  | 2 +-
 conda/environments/all_cuda-118_arch-x86_64.yaml | 4 ++--
 conda/recipes/custreamz/meta.yaml                | 4 ++--
 conda/recipes/dask-cudf/meta.yaml                | 8 ++++----
 conda/recipes/dask-cudf/run_test.sh              | 2 +-
 dependencies.yaml                                | 4 ++--
 python/dask_cudf/setup.py                        | 4 ++--
 9 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index c27fe23d078..e221424d1cd 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
 export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
 export DASK_STABLE_VERSION="2023.1.1"
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index 5b4a201e5e9..3aa00ff7de9 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -35,7 +35,7 @@ export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
 
 # Whether to keep `dask/label/dev` channel in the env. If INSTALL_DASK_MAIN=0,
 # `dask/label/dev` channel is removed.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 # Switch to project root; also root of repo checkout
 cd "$WORKSPACE"
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 0e790ba05ec..ff40fdf6c9f 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -39,7 +39,7 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 unset GIT_DESCRIBE_TAG
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
 export DASK_STABLE_VERSION="2023.1.1"
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 4f62e48a6f1..675df3891c3 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -21,8 +21,8 @@ dependencies:
 - cxx-compiler
 - cython>=0.29,<0.30
 - dask-cuda=23.04.*
-- dask==2023.1.1
-- distributed==2023.1.1
+- dask>=2023.1.1
+- distributed>=2023.1.1
 - dlpack>=0.5,<0.6.0a0
 - doxygen=1.8.20
 - fastavro>=0.22.9
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 24f53289754..af5705341e6 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -39,8 +39,8 @@ requirements:
     - python
     - streamz
     - cudf ={{ version }}
-    - dask ==2023.1.1
-    - distributed ==2023.1.1
+    - dask >=2023.1.1
+    - distributed >=2023.1.1
     - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka ={{ version }}
 
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index bc1c4783361..3ee3d4d3952 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -35,14 +35,14 @@ requirements:
   host:
     - python
     - cudf ={{ version }}
-    - dask ==2023.1.1
-    - distributed ==2023.1.1
+    - dask >=2023.1.1
+    - distributed >=2023.1.1
     - cudatoolkit ={{ cuda_version }}
   run:
     - python
     - cudf ={{ version }}
-    - dask ==2023.1.1
-    - distributed ==2023.1.1
+    - dask >=2023.1.1
+    - distributed >=2023.1.1
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
 
 test:
diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh
index 3b1fc46c4f4..78be90757a2 100644
--- a/conda/recipes/dask-cudf/run_test.sh
+++ b/conda/recipes/dask-cudf/run_test.sh
@@ -18,7 +18,7 @@ if [ "${ARCH}" = "aarch64" ]; then
 fi
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
 export DASK_STABLE_VERSION="2023.1.1"
diff --git a/dependencies.yaml b/dependencies.yaml
index 0a3a2ce7828..ae8eac4ea30 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -226,8 +226,8 @@ dependencies:
       - output_types: [conda, requirements]
         packages:
           - cachetools
-          - dask==2023.1.1
-          - distributed==2023.1.1
+          - dask>=2023.1.1
+          - distributed>=2023.1.1
           - fsspec>=0.6.0
           - numba>=0.56.2
           - numpy
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index be4c704019d..04145d23978 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -8,8 +8,8 @@
 cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="")
 
 install_requires = [
-    "dask==2023.1.1",
-    "distributed==2023.1.1",
+    "dask>=2023.1.1",
+    "distributed>=2023.1.1",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",

From c4a1389bca6f2fd521bd5e768eda7407aa3e66b5 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Fri, 10 Feb 2023 11:36:23 -0600
Subject: [PATCH 22/24] Fix `Series` comparison vs scalars (#12519)

Fixes an issue where this happens:

```python
import cudf
cudf.Series(['a','b','c']) == 1
```
```
  File "/raid/brmiller/anaconda/envs/cudf_dev/lib/python3.9/site-packages/cudf/core/mixins/mixin_factory.py", line 11, in wrapper
    return method(self, *args1, *args2, **kwargs1, **kwargs2)
  File "/raid/brmiller/anaconda/envs/cudf_dev/lib/python3.9/site-packages/cudf/core/indexed_frame.py", line 3278, in _binaryop
    ColumnAccessor(type(self)._colwise_binop(operands, op)),
  File "/raid/brmiller/anaconda/envs/cudf_dev/lib/python3.9/site-packages/cudf/core/column_accessor.py", line 124, in __init__
    column_length = len(data[next(iter(data))])
TypeError: object of type 'bool' has no len()
```

It turns out this happens because `StringColumn`'s `normalize_binop_value` method returns `NotImplemented` for scalars that are not of dtype `object`. This eventually causes python to dispatch to the python scalar class' `__eq__` which returns the scalar `False` when encountering a cuDF object. cuDF expects a column object at this point but has a scalar.

This in turn causes cuDF to try and construct a `ColumnAccessor` around a dict that looks like `{'name', False}` ultimately throwing the error.

This PR proposes to earlystop this behavior according to the rules for comparing python string scalars with other objects:
- Always return `False` for `__eq__` even if the character in the string is equivalent to whatever is being compared
- Always return `True` for `__ne__` ditto above.
- Copy the input mask

This should align us with pandas behavior for this case:

```python
>>> pd.Series(['a','b', 'c'], dtype='string') == 1
0    False
1    False
2    False
dtype: boolean
>>> pd.Series(['a','b', 'c'], dtype='string') != 1
0    True
1    True
2    True
dtype: boolean
```

EDIT:
Updating this PR to handle a similar issue resulting in the same error when comparing datetime series to strings that contain valid datetimes, such as `20110101`.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12519
---
 python/cudf/cudf/core/column/datetime.py |  7 +-
 python/cudf/cudf/core/column/string.py   | 13 +++-
 python/cudf/cudf/tests/test_binops.py    | 81 ++++++++++++++++++------
 python/cudf/cudf/tests/test_datetime.py  | 26 ++++++++
 4 files changed, 104 insertions(+), 23 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 56436ac141d..0c546168fe3 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -261,6 +261,11 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
                 return cudf.Scalar(None, dtype=other.dtype)
 
             return cudf.Scalar(other)
+        elif isinstance(other, str):
+            try:
+                return cudf.Scalar(other, dtype=self.dtype)
+            except ValueError:
+                pass
 
         return NotImplemented
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 9c30585a541..ce8bc3da08b 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5665,7 +5665,7 @@ def normalize_binop_value(
             and other.dtype == "object"
         ):
             return other
-        if isinstance(other, str):
+        if is_scalar(other):
             return cudf.Scalar(other)
         return NotImplemented
 
@@ -5701,6 +5701,17 @@ def _binaryop(
             return NotImplemented
 
         if isinstance(other, (StringColumn, str, cudf.Scalar)):
+            if isinstance(other, cudf.Scalar) and other.dtype != "O":
+                if op in {
+                    "__eq__",
+                    "__ne__",
+                }:
+                    return column.full(
+                        len(self), op == "__ne__", dtype="bool"
+                    ).set_mask(self.mask)
+                else:
+                    return NotImplemented
+
             if op == "__add__":
                 if isinstance(other, cudf.Scalar):
                     other = cast(
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index e5ade1326c9..7d01f89eada 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 import decimal
 import operator
@@ -320,29 +320,68 @@ def test_series_compare_nulls(cmpop, dtypes):
     utils.assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "obj", [pd.Series(["a", "b", None, "d", "e", None], dtype="string"), "a"]
-)
-@pytest.mark.parametrize("cmpop", _cmpops)
-@pytest.mark.parametrize(
-    "cmp_obj",
-    [pd.Series(["b", "a", None, "d", "f", None], dtype="string"), "a"],
-)
-def test_string_series_compare(obj, cmpop, cmp_obj):
+@pytest.fixture
+def str_series_cmp_data():
+    return pd.Series(["a", "b", None, "d", "e", None], dtype="string")
 
-    g_obj = obj
-    if isinstance(g_obj, pd.Series):
-        g_obj = Series.from_pandas(g_obj)
-    g_cmp_obj = cmp_obj
-    if isinstance(g_cmp_obj, pd.Series):
-        g_cmp_obj = Series.from_pandas(g_cmp_obj)
-    got = cmpop(g_obj, g_cmp_obj)
-    expected = cmpop(obj, cmp_obj)
 
-    if isinstance(expected, pd.Series):
-        expected = cudf.from_pandas(expected)
+@pytest.fixture(ids=[op.__name__ for op in _cmpops], params=_cmpops)
+def str_series_compare_str_cmpop(request):
+    return request.param
 
-    utils.assert_eq(expected, got)
+
+@pytest.fixture(ids=["eq", "ne"], params=[operator.eq, operator.ne])
+def str_series_compare_num_cmpop(request):
+    return request.param
+
+
+@pytest.fixture(ids=["int", "float", "bool"], params=[1, 1.5, True])
+def cmp_scalar(request):
+    return request.param
+
+
+def test_str_series_compare_str(
+    str_series_cmp_data, str_series_compare_str_cmpop
+):
+    expect = str_series_compare_str_cmpop(str_series_cmp_data, "a")
+    got = str_series_compare_str_cmpop(
+        Series.from_pandas(str_series_cmp_data), "a"
+    )
+
+    utils.assert_eq(expect, got.to_pandas(nullable=True))
+
+
+def test_str_series_compare_str_reflected(
+    str_series_cmp_data, str_series_compare_str_cmpop
+):
+    expect = str_series_compare_str_cmpop("a", str_series_cmp_data)
+    got = str_series_compare_str_cmpop(
+        "a", Series.from_pandas(str_series_cmp_data)
+    )
+
+    utils.assert_eq(expect, got.to_pandas(nullable=True))
+
+
+def test_str_series_compare_num(
+    str_series_cmp_data, str_series_compare_num_cmpop, cmp_scalar
+):
+    expect = str_series_compare_num_cmpop(str_series_cmp_data, cmp_scalar)
+    got = str_series_compare_num_cmpop(
+        Series.from_pandas(str_series_cmp_data), cmp_scalar
+    )
+
+    utils.assert_eq(expect, got.to_pandas(nullable=True))
+
+
+def test_str_series_compare_num_reflected(
+    str_series_cmp_data, str_series_compare_num_cmpop, cmp_scalar
+):
+    expect = str_series_compare_num_cmpop(cmp_scalar, str_series_cmp_data)
+    got = str_series_compare_num_cmpop(
+        cmp_scalar, Series.from_pandas(str_series_cmp_data)
+    )
+
+    utils.assert_eq(expect, got.to_pandas(nullable=True))
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 5616cea42ba..1211938ff10 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -22,6 +22,15 @@
     expect_warning_if,
 )
 
+_cmpops = [
+    operator.lt,
+    operator.gt,
+    operator.le,
+    operator.ge,
+    operator.eq,
+    operator.ne,
+]
+
 
 def data1():
     return pd.date_range("20010101", "20020215", freq="400h", name="times")
@@ -986,6 +995,23 @@ def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op):
         )
 
 
+@pytest.mark.parametrize("data", ["20110101", "20120101", "20130101"])
+@pytest.mark.parametrize("other_scalars", ["20110101", "20120101", "20130101"])
+@pytest.mark.parametrize("op", _cmpops)
+@pytest.mark.parametrize(
+    "dtype",
+    ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"],
+)
+def test_datetime_series_cmpops_with_scalars(data, other_scalars, dtype, op):
+    gsr = cudf.Series(data=data, dtype=dtype)
+    psr = gsr.to_pandas()
+
+    expect = op(psr, other_scalars)
+    got = op(gsr, other_scalars)
+
+    assert_eq(expect, got)
+
+
 @pytest.mark.parametrize(
     "data",
     [

From 2d7e79a7ae4f40fdb278798d02b79480aaef1adf Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 10 Feb 2023 15:37:30 -0500
Subject: [PATCH 23/24] Add nvbench environment class for initializing RMM in
 benchmarks (#12728)

Adds an environment class to initialize the RMM pool memory resource manager before running benchmarks through nvbench. This removes the need to initialize RMM on every benchmark call which improves benchmark run performance and fixes some GPU metrics gathering when run under nsys.

This requires a patch to the nvbench source to enable this feature and is part of the following pull request:  https://github.com/NVIDIA/nvbench/pull/123
The patch can be removed once the PR is merged and source available to the libcudf build.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Divye Gala (https://github.com/divyegala)
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)
  - Robert Maynard (https://github.com/robertmaynard)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/12728
---
 cpp/CMakeLists.txt                            |  9 ++---
 cpp/benchmarks/fixture/rmm_pool_raii.hpp      | 13 +++++++-
 cpp/benchmarks/groupby/group_max.cpp          |  3 +-
 cpp/benchmarks/groupby/group_nunique.cpp      |  3 +-
 cpp/benchmarks/groupby/group_rank.cpp         |  3 +-
 cpp/benchmarks/groupby/group_struct_keys.cpp  |  4 +--
 cpp/benchmarks/io/csv/csv_reader_input.cpp    |  4 ---
 cpp/benchmarks/io/csv/csv_reader_options.cpp  |  4 +--
 cpp/benchmarks/io/fst.cu                      | 14 +-------
 cpp/benchmarks/io/json/nested_json.cpp        |  6 ----
 cpp/benchmarks/io/orc/orc_reader_input.cpp    |  4 ---
 cpp/benchmarks/io/orc/orc_reader_options.cpp  |  4 +--
 cpp/benchmarks/io/orc/orc_writer.cpp          |  8 +----
 cpp/benchmarks/io/orc/orc_writer_chunks.cpp   |  6 +---
 .../io/parquet/parquet_reader_input.cpp       |  4 ---
 .../io/parquet/parquet_reader_options.cpp     |  2 --
 cpp/benchmarks/io/parquet/parquet_writer.cpp  |  6 +---
 .../io/parquet/parquet_writer_chunks.cpp      |  6 +---
 cpp/benchmarks/io/text/multibyte_split.cpp    |  2 --
 cpp/benchmarks/join/join.cu                   |  9 -----
 cpp/benchmarks/join/mixed_join.cu             | 15 ---------
 cpp/benchmarks/reduction/distinct_count.cpp   |  4 +--
 cpp/benchmarks/reduction/rank.cpp             |  4 +--
 cpp/benchmarks/reduction/scan_structs.cpp     |  4 +--
 cpp/benchmarks/reduction/segment_reduce.cu    |  5 +--
 cpp/benchmarks/search/contains.cpp            |  3 +-
 cpp/benchmarks/sort/rank_lists.cpp            |  2 --
 cpp/benchmarks/sort/rank_structs.cpp          |  2 --
 cpp/benchmarks/sort/segmented_sort.cpp        |  4 +--
 cpp/benchmarks/sort/sort_lists.cpp            |  2 --
 cpp/benchmarks/sort/sort_structs.cpp          |  2 --
 cpp/benchmarks/stream_compaction/distinct.cpp |  6 +---
 cpp/benchmarks/stream_compaction/unique.cpp   |  4 ---
 cpp/benchmarks/string/like.cpp                |  3 +-
 cpp/benchmarks/string/reverse.cpp             |  3 +-
 cpp/cmake/thirdparty/get_nvbench.cmake        | 33 +++++++++++++++++++
 .../patches/nvbench_global_setup.diff         | 27 +++++++++++++++
 .../thirdparty/patches/nvbench_override.json  | 14 ++++++++
 38 files changed, 109 insertions(+), 142 deletions(-)
 create mode 100644 cpp/cmake/thirdparty/get_nvbench.cmake
 create mode 100644 cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
 create mode 100644 cpp/cmake/thirdparty/patches/nvbench_override.json

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 19c118016bf..a635e655c39 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -784,12 +784,9 @@ if(CUDF_BUILD_BENCHMARKS)
   include(${rapids-cmake-dir}/cpm/gbench.cmake)
   rapids_cpm_gbench()
 
-  # Find or install NVBench Temporarily force downloading of fmt because current versions of nvbench
-  # do not support the latest version of fmt, which is automatically pulled into our conda
-  # environments by mamba.
-  set(CPM_DOWNLOAD_fmt TRUE)
-  include(${rapids-cmake-dir}/cpm/nvbench.cmake)
-  rapids_cpm_nvbench()
+  # Find or install nvbench
+  include(cmake/thirdparty/get_nvbench.cmake)
+
   add_subdirectory(benchmarks)
 endif()
 
diff --git a/cpp/benchmarks/fixture/rmm_pool_raii.hpp b/cpp/benchmarks/fixture/rmm_pool_raii.hpp
index 60586ef878b..465c53a91ea 100644
--- a/cpp/benchmarks/fixture/rmm_pool_raii.hpp
+++ b/cpp/benchmarks/fixture/rmm_pool_raii.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -69,4 +69,15 @@ class rmm_pool_raii {
   std::shared_ptr<rmm::mr::device_memory_resource> mr;
 };
 
+/**
+ * Base fixture for cudf benchmarks using nvbench.
+ *
+ * Initializes the default memory resource to use the RMM pool device resource.
+ */
+struct nvbench_base_fixture {
+  rmm_pool_raii _mr;
+};
+
 }  // namespace cudf
+
+#define NVBENCH_ENVIRONMENT cudf::nvbench_base_fixture
diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp
index 4956cce0daf..077558f8709 100644
--- a/cpp/benchmarks/groupby/group_max.cpp
+++ b/cpp/benchmarks/groupby/group_max.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,7 +24,6 @@
 template <typename Type>
 void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
   const auto size = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   auto const keys = [&] {
diff --git a/cpp/benchmarks/groupby/group_nunique.cpp b/cpp/benchmarks/groupby/group_nunique.cpp
index 05698c04058..f74ed95200e 100644
--- a/cpp/benchmarks/groupby/group_nunique.cpp
+++ b/cpp/benchmarks/groupby/group_nunique.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,7 +40,6 @@ auto make_aggregation_request_vector(cudf::column_view const& values, Args&&...
 template <typename Type>
 void bench_groupby_nunique(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
   const auto size = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   auto const keys = [&] {
diff --git a/cpp/benchmarks/groupby/group_rank.cpp b/cpp/benchmarks/groupby/group_rank.cpp
index f573b63a75d..2a70b95890b 100644
--- a/cpp/benchmarks/groupby/group_rank.cpp
+++ b/cpp/benchmarks/groupby/group_rank.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,7 +30,6 @@ static void nvbench_groupby_rank(nvbench::state& state,
 {
   using namespace cudf;
   constexpr auto dtype = type_to_id<int64_t>();
-  cudf::rmm_pool_raii pool_raii;
 
   bool const is_sorted              = state.get_int64("is_sorted");
   cudf::size_type const column_size = state.get_int64("data_size");
diff --git a/cpp/benchmarks/groupby/group_struct_keys.cpp b/cpp/benchmarks/groupby/group_struct_keys.cpp
index cc6f0faaf41..53ef12ffeaa 100644
--- a/cpp/benchmarks/groupby/group_struct_keys.cpp
+++ b/cpp/benchmarks/groupby/group_struct_keys.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,8 +29,6 @@
 
 void bench_groupby_struct_keys(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   using Type           = int;
   using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
   std::default_random_engine generator;
diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
index a68f689e4db..026045acee7 100644
--- a/cpp/benchmarks/io/csv/csv_reader_input.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -68,8 +68,6 @@ template <data_type DataType, cudf::io::io_type IOType>
 void BM_csv_read_input(nvbench::state& state,
                        nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
   auto const source_type = IOType;
 
@@ -79,8 +77,6 @@ void BM_csv_read_input(nvbench::state& state,
 template <cudf::io::io_type IOType>
 void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type      = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp
index 04522c16d5c..2d0e0e5754e 100644
--- a/cpp/benchmarks/io/csv/csv_reader_options.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,8 +32,6 @@ void BM_csv_read_varying_options(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<ColSelection>, nvbench::enum_type<RowSelection>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const data_types =
     dtypes_for_column_selection(get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                                    static_cast<int32_t>(data_type::FLOAT),
diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu
index 6d318db12de..7acf69e9d8e 100644
--- a/cpp/benchmarks/io/fst.cu
+++ b/cpp/benchmarks/io/fst.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,9 +78,6 @@ constexpr std::size_t single_item = 1;
 
 void BM_FST_JSON(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
   auto const string_size{size_type(state.get_int64("string_size"))};
@@ -116,9 +113,6 @@ void BM_FST_JSON(nvbench::state& state)
 
 void BM_FST_JSON_no_outidx(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
   auto const string_size{size_type(state.get_int64("string_size"))};
@@ -154,9 +148,6 @@ void BM_FST_JSON_no_outidx(nvbench::state& state)
 
 void BM_FST_JSON_no_out(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
   auto const string_size{size_type(state.get_int64("string_size"))};
@@ -190,9 +181,6 @@ void BM_FST_JSON_no_out(nvbench::state& state)
 
 void BM_FST_JSON_no_str(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
   auto const string_size{size_type(state.get_int64("string_size"))};
diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp
index 2abae88dca3..416cf403671 100644
--- a/cpp/benchmarks/io/json/nested_json.cpp
+++ b/cpp/benchmarks/io/json/nested_json.cpp
@@ -157,9 +157,6 @@ auto make_test_json_data(cudf::size_type string_size, rmm::cuda_stream_view stre
 
 void BM_NESTED_JSON(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const string_size{cudf::size_type(state.get_int64("string_size"))};
   auto const default_options = cudf::io::json_reader_options{};
 
@@ -189,9 +186,6 @@ NVBENCH_BENCH(BM_NESTED_JSON)
 
 void BM_NESTED_JSON_DEPTH(nvbench::state& state)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const string_size{cudf::size_type(state.get_int64("string_size"))};
   auto const depth{cudf::size_type(state.get_int64("depth"))};
 
diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index a57a12debc6..4705c083c02 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -61,8 +61,6 @@ template <data_type DataType, cudf::io::io_type IOType>
 void BM_orc_read_data(nvbench::state& state,
                       nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
@@ -85,8 +83,6 @@ void BM_orc_read_io_compression(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp
index 1b7d33ccd19..1e841f744ae 100644
--- a/cpp/benchmarks/io/orc/orc_reader_options.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,8 +56,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
                                                     nvbench::enum_type<UsesNumpyDType>,
                                                     nvbench::enum_type<Timestamp>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks;
 
   auto const use_index     = UsesIndex == uses_index::YES;
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index 545f8d10122..67bf4cb750b 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,8 +46,6 @@ constexpr cudf::size_type num_cols = 64;
 template <data_type DataType>
 void BM_orc_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
@@ -90,8 +88,6 @@ void BM_orc_write_io_compression(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
@@ -141,8 +137,6 @@ void BM_orc_write_statistics(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<Statistics>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
index 592eae96362..eda70bc05e6 100644
--- a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,8 +35,6 @@ constexpr int64_t data_size = 512 << 20;
 
 void nvbench_orc_write(nvbench::state& state)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   cudf::size_type num_cols = state.get_int64("num_columns");
 
   auto tbl = create_random_table(
@@ -79,8 +77,6 @@ void nvbench_orc_write(nvbench::state& state)
 
 void nvbench_orc_chunked_write(nvbench::state& state)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   cudf::size_type num_cols   = state.get_int64("num_columns");
   cudf::size_type num_tables = state.get_int64("num_chunks");
 
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index fba69cb2b0f..e04dfbbc799 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -62,8 +62,6 @@ void BM_parquet_read_data(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
@@ -88,8 +86,6 @@ void BM_parquet_read_io_compression(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                          static_cast<int32_t>(data_type::FLOAT),
                                          static_cast<int32_t>(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
index 6e187afd6ab..3fd46fa08f2 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
@@ -57,8 +57,6 @@ void BM_parquet_read_options(nvbench::state& state,
                                                 nvbench::enum_type<UsesPandasMetadata>,
                                                 nvbench::enum_type<Timestamp>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto constexpr str_to_categories = ConvertsStrings == converts_strings::YES;
   auto constexpr uses_pd_metadata  = UsesPandasMetadata == uses_pandas_metadata::YES;
 
diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index a0b076abfda..d3d22e06086 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,8 +46,6 @@ constexpr cudf::size_type num_cols = 64;
 template <data_type DataType>
 void BM_parq_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const data_types             = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
@@ -90,8 +88,6 @@ void BM_parq_write_io_compression(
   nvbench::state& state,
   nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                              static_cast<int32_t>(data_type::FLOAT),
                                              static_cast<int32_t>(data_type::DECIMAL),
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
index 11b29cc2297..ed70f53cad8 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,8 +33,6 @@ constexpr int64_t data_size = 512 << 20;
 
 void PQ_write(nvbench::state& state)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   cudf::size_type const num_cols = state.get_int64("num_cols");
 
   auto const tbl  = create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols),
@@ -67,8 +65,6 @@ void PQ_write(nvbench::state& state)
 
 void PQ_write_chunked(nvbench::state& state)
 {
-  cudf::rmm_pool_raii rmm_pool;
-
   cudf::size_type const num_cols   = state.get_int64("num_cols");
   cudf::size_type const num_tables = state.get_int64("num_chunks");
 
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index 261243d29fb..41b5ddb567e 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -116,8 +116,6 @@ template <data_chunk_source_type source_type>
 static void bench_multibyte_split(nvbench::state& state,
                                   nvbench::type_list<nvbench::enum_type<source_type>>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const delim_size         = state.get_int64("delim_size");
   auto const delim_percent      = state.get_int64("delim_percent");
   auto const file_size_approx   = state.get_int64("size_approx");
diff --git a/cpp/benchmarks/join/join.cu b/cpp/benchmarks/join/join.cu
index 053eb6c2852..647e37aa97d 100644
--- a/cpp/benchmarks/join/join.cu
+++ b/cpp/benchmarks/join/join.cu
@@ -23,9 +23,6 @@ void nvbench_inner_join(nvbench::state& state,
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_input,
                  cudf::table_view const& right_input,
                  cudf::null_equality compare_nulls,
@@ -43,9 +40,6 @@ void nvbench_left_join(nvbench::state& state,
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_input,
                  cudf::table_view const& right_input,
                  cudf::null_equality compare_nulls,
@@ -63,9 +57,6 @@ void nvbench_full_join(nvbench::state& state,
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_input,
                  cudf::table_view const& right_input,
                  cudf::null_equality compare_nulls,
diff --git a/cpp/benchmarks/join/mixed_join.cu b/cpp/benchmarks/join/mixed_join.cu
index b7da5e2c0b3..1420625bbcd 100644
--- a/cpp/benchmarks/join/mixed_join.cu
+++ b/cpp/benchmarks/join/mixed_join.cu
@@ -23,9 +23,6 @@ void nvbench_mixed_inner_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
@@ -50,9 +47,6 @@ void nvbench_mixed_left_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
@@ -77,9 +71,6 @@ void nvbench_mixed_full_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
@@ -104,9 +95,6 @@ void nvbench_mixed_left_semi_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
@@ -131,9 +119,6 @@ void nvbench_mixed_left_anti_join(
 {
   skip_helper(state);
 
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii pool_raii;
-
   auto join = [](cudf::table_view const& left_equality_input,
                  cudf::table_view const& right_equality_input,
                  cudf::table_view const& left_conditional_input,
diff --git a/cpp/benchmarks/reduction/distinct_count.cpp b/cpp/benchmarks/reduction/distinct_count.cpp
index 489d7935809..d2218c270a8 100644
--- a/cpp/benchmarks/reduction/distinct_count.cpp
+++ b/cpp/benchmarks/reduction/distinct_count.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,8 +24,6 @@
 template <typename Type>
 static void bench_reduction_distinct_count(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const dtype            = cudf::type_to_id<Type>();
   auto const size             = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp
index 5022e029d97..41295f787fc 100644
--- a/cpp/benchmarks/reduction/rank.cpp
+++ b/cpp/benchmarks/reduction/rank.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,8 +26,6 @@
 template <typename type>
 static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list<type>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const dtype = cudf::type_to_id<type>();
 
   double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/reduction/scan_structs.cpp b/cpp/benchmarks/reduction/scan_structs.cpp
index 92016041c9a..d5b19faf773 100644
--- a/cpp/benchmarks/reduction/scan_structs.cpp
+++ b/cpp/benchmarks/reduction/scan_structs.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,8 +28,6 @@ static constexpr cudf::size_type max_str_length     = 32;
 
 static void nvbench_structs_scan(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const null_probability = [&] {
     auto const null_prob_val = state.get_float64("null_probability");
     return null_prob_val > 0 ? std::optional{null_prob_val} : std::nullopt;
diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segment_reduce.cu
index e063adb25f9..127b3598dae 100644
--- a/cpp/benchmarks/reduction/segment_reduce.cu
+++ b/cpp/benchmarks/reduction/segment_reduce.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,9 +86,6 @@ template <typename DataType, cudf::aggregation::Kind kind>
 void BM_Simple_Segmented_Reduction(nvbench::state& state,
                                    nvbench::type_list<DataType, nvbench::enum_type<kind>>)
 {
-  // TODO: to be replaced by nvbench fixture once it's ready
-  cudf::rmm_pool_raii rmm_pool;
-
   auto const column_size{cudf::size_type(state.get_int64("column_size"))};
   auto const num_segments{cudf::size_type(state.get_int64("num_segments"))};
 
diff --git a/cpp/benchmarks/search/contains.cpp b/cpp/benchmarks/search/contains.cpp
index 8daa975d4ed..01a0a37b21a 100644
--- a/cpp/benchmarks/search/contains.cpp
+++ b/cpp/benchmarks/search/contains.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,6 @@ std::unique_ptr<cudf::column> create_column_data(cudf::size_type n_rows, bool ha
 
 static void nvbench_contains_scalar(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
   using Type = int;
 
   auto const has_nulls = static_cast<bool>(state.get_int64("has_nulls"));
diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp
index f467b639810..49dc409ebfc 100644
--- a/cpp/benchmarks/sort/rank_lists.cpp
+++ b/cpp/benchmarks/sort/rank_lists.cpp
@@ -26,8 +26,6 @@
 template <cudf::rank_method method>
 void nvbench_rank_lists(nvbench::state& state, nvbench::type_list<nvbench::enum_type<method>>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const table = create_lists_data(state);
 
   auto const null_frequency{state.get_float64("null_frequency")};
diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp
index c1e2c5bd7dc..c0227e85191 100644
--- a/cpp/benchmarks/sort/rank_structs.cpp
+++ b/cpp/benchmarks/sort/rank_structs.cpp
@@ -24,8 +24,6 @@
 template <cudf::rank_method method>
 void nvbench_rank_structs(nvbench::state& state, nvbench::type_list<nvbench::enum_type<method>>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const table = create_structs_data(state);
 
   const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
diff --git a/cpp/benchmarks/sort/segmented_sort.cpp b/cpp/benchmarks/sort/segmented_sort.cpp
index e3459291caf..22d2b1c4029 100644
--- a/cpp/benchmarks/sort/segmented_sort.cpp
+++ b/cpp/benchmarks/sort/segmented_sort.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,8 +26,6 @@
 
 void nvbench_segmented_sort(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const stable     = static_cast<bool>(state.get_int64("stable"));
   auto const dtype      = cudf::type_to_id<int32_t>();
   auto const size_bytes = static_cast<size_t>(state.get_int64("size_bytes"));
diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp
index 14cc60cbfe7..b55b60f5ec9 100644
--- a/cpp/benchmarks/sort/sort_lists.cpp
+++ b/cpp/benchmarks/sort/sort_lists.cpp
@@ -22,8 +22,6 @@
 
 void nvbench_sort_lists(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const table = create_lists_data(state);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp
index 22a6780c237..1d54fa42f6f 100644
--- a/cpp/benchmarks/sort/sort_structs.cpp
+++ b/cpp/benchmarks/sort/sort_structs.cpp
@@ -22,8 +22,6 @@
 
 void nvbench_sort_struct(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const input = create_structs_data(state);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp
index 512554ff1bc..81eafa3044f 100644
--- a/cpp/benchmarks/stream_compaction/distinct.cpp
+++ b/cpp/benchmarks/stream_compaction/distinct.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,8 +29,6 @@ NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::ti
 template <typename Type>
 void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   cudf::size_type const num_rows = state.get_int64("NumRows");
 
   data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution(
@@ -61,8 +59,6 @@ NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type))
 template <typename Type>
 void nvbench_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
 {
-  cudf::rmm_pool_raii pool_raii;
-
   auto const size               = state.get_int64("ColumnSize");
   auto const dtype              = cudf::type_to_id<Type>();
   double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/stream_compaction/unique.cpp b/cpp/benchmarks/stream_compaction/unique.cpp
index 9a0f4c3b743..dafb9d506c7 100644
--- a/cpp/benchmarks/stream_compaction/unique.cpp
+++ b/cpp/benchmarks/stream_compaction/unique.cpp
@@ -54,8 +54,6 @@ void nvbench_unique(nvbench::state& state, nvbench::type_list<Type, nvbench::enu
     state.skip("Skip unwanted benchmarks.");
   }
 
-  cudf::rmm_pool_raii pool_raii;
-
   cudf::size_type const num_rows = state.get_int64("NumRows");
   auto const sorting             = state.get_int64("Sort");
 
@@ -102,8 +100,6 @@ void nvbench_unique_list(nvbench::state& state, nvbench::type_list<Type, nvbench
     state.skip("Skip unwanted benchmarks.");
   }
 
-  cudf::rmm_pool_raii pool_raii;
-
   auto const size               = state.get_int64("ColumnSize");
   auto const dtype              = cudf::type_to_id<Type>();
   double const null_probability = state.get_float64("null_probability");
diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp
index de7382f5a75..d86c31480dd 100644
--- a/cpp/benchmarks/string/like.cpp
+++ b/cpp/benchmarks/string/like.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -71,7 +71,6 @@ std::unique_ptr<cudf::column> build_input_column(cudf::size_type n_rows, int32_t
 
 static void bench_like(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
   auto const n_rows   = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const hit_rate = static_cast<int32_t>(state.get_int64("hit_rate"));
 
diff --git a/cpp/benchmarks/string/reverse.cpp b/cpp/benchmarks/string/reverse.cpp
index 7b08897079b..4c3846c79bb 100644
--- a/cpp/benchmarks/string/reverse.cpp
+++ b/cpp/benchmarks/string/reverse.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,7 +25,6 @@
 
 static void bench_reverse(nvbench::state& state)
 {
-  cudf::rmm_pool_raii pool_raii;
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
 
diff --git a/cpp/cmake/thirdparty/get_nvbench.cmake b/cpp/cmake/thirdparty/get_nvbench.cmake
new file mode 100644
index 00000000000..3a39e6c7ad1
--- /dev/null
+++ b/cpp/cmake/thirdparty/get_nvbench.cmake
@@ -0,0 +1,33 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# This function finds nvbench and applies any needed patches.
+function(find_and_configure_nvbench)
+
+  include(${rapids-cmake-dir}/cpm/nvbench.cmake)
+  include(${rapids-cmake-dir}/cpm/package_override.cmake)
+
+  # Find or install NVBench Temporarily force downloading of fmt because current versions of nvbench
+  # do not support the latest version of fmt, which is automatically pulled into our conda
+  # environments by mamba.
+  set(CPM_DOWNLOAD_fmt TRUE)
+
+  set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
+  rapids_cpm_package_override("${cudf_patch_dir}/nvbench_override.json")
+
+  rapids_cpm_nvbench()
+
+endfunction()
+
+find_and_configure_nvbench()
diff --git a/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
new file mode 100644
index 00000000000..4e18385f664
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
@@ -0,0 +1,27 @@
+diff --git a/nvbench/main.cuh b/nvbench/main.cuh
+index 4c1588c..3ba2b99 100644
+--- a/nvbench/main.cuh
++++ b/nvbench/main.cuh
+@@ -54,6 +54,14 @@
+ // clang-format on
+ #endif
+
++#ifndef NVBENCH_ENVIRONMENT
++namespace nvbench {
++struct no_environment
++{};
++}
++#define NVBENCH_ENVIRONMENT nvbench::no_environment
++#endif
++
+ #define NVBENCH_MAIN_PARSE(argc, argv)                                         \
+   nvbench::option_parser parser;                                               \
+   parser.parse(argc, argv)
+@@ -77,6 +85,7 @@
+     printer.set_total_state_count(total_states);                               \
+                                                                                \
+     printer.set_completed_state_count(0);                                      \
++    NVBENCH_ENVIRONMENT{};                                                     \
+     for (auto &bench_ptr : benchmarks)                                         \
+     {                                                                          \
+       bench_ptr->set_printer(printer);                                         \
diff --git a/cpp/cmake/thirdparty/patches/nvbench_override.json b/cpp/cmake/thirdparty/patches/nvbench_override.json
new file mode 100644
index 00000000000..ad9b19c29c1
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nvbench_override.json
@@ -0,0 +1,14 @@
+
+{
+  "packages" : {
+    "nvbench" : {
+      "patches" : [
+        {
+          "file" : "${current_json_dir}/nvbench_global_setup.diff",
+          "issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]",
+          "fixed_in" : ""
+        }
+      ]
+    }
+  }
+}

From 1c0224f15513709d5a6aab59c3503c0a6af59835 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 10 Feb 2023 14:13:20 -0800
Subject: [PATCH 24/24] Reenable stream identification library in CI (#12714)

This PR reenables the preload library introduced for verifying stream usage in libcudf in #11875. This library was disabled during the GitHub Actions migration.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12714
---
 ci/test_cpp.sh                                | 38 ++++++------
 cpp/CMakeLists.txt                            | 42 ++++++++++++-
 cpp/tests/CMakeLists.txt                      | 13 ++++
 .../test_default_stream_identification.cu     |  4 +-
 .../identify_stream_usage.cpp                 |  2 +-
 .../identify_stream_usage/CMakeLists.txt      | 60 -------------------
 python/cudf/CMakeLists.txt                    |  1 +
 7 files changed, 78 insertions(+), 82 deletions(-)
 rename cpp/tests/{utilities => }/identify_stream_usage/test_default_stream_identification.cu (93%)
 rename cpp/tests/utilities/{identify_stream_usage => }/identify_stream_usage.cpp (99%)
 delete mode 100644 cpp/tests/utilities/identify_stream_usage/CMakeLists.txt

diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index b3d7919b279..d6681881419 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -35,13 +35,10 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
-# TODO: Disabling stream identification for now.
-# Set up library for finding incorrect default stream usage.
-#pushd "cpp/tests/utilities/identify_stream_usage/"
-#mkdir build && cd build && cmake .. -GNinja && ninja && ninja test
-#STREAM_IDENTIFY_LIB="$(realpath build/libidentify_stream_usage.so)"
-#echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}"
-#popd
+# Get library for finding incorrect default stream usage.
+STREAM_IDENTIFY_LIB="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage.so"
+
+echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}"
 
 # Run libcudf and libcudf_kafka gtests from libcudf-tests package
 rapids-logger "Run gtests"
@@ -51,17 +48,22 @@ rapids-logger "Run gtests"
 for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
     test_name=$(basename ${gt})
     echo "Running gtest $test_name"
-    ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
-    # TODO: Disabling stream identification for now.
-    #if [[ ${test_name} == "SPAN_TEST" ]]; then
-    #    # This one test is specifically designed to test using a thrust device
-    #    # vector, so we expect and allow it to include default stream usage.
-    #    gtest_filter="SpanTest.CanConstructFromDeviceContainers"
-    #    GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
-    #        ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}"
-    #else
-    #    GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
-    #fi
+
+    # TODO: This strategy for using the stream lib will need to change when we
+    # switch to invoking ctest. For one, we will want to set the test
+    # properties to use the lib (which means that the decision will be made at
+    # CMake-configure time instead of runtime). We may also need to leverage
+    # something like gtest_discover_tests to be able to filter on the
+    # underlying test names.
+    if [[ ${test_name} == "SPAN_TEST" ]]; then
+        # This one test is specifically designed to test using a thrust device
+        # vector, so we expect and allow it to include default stream usage.
+        gtest_filter="SpanTest.CanConstructFromDeviceContainers"
+        GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
+            ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}"
+    else
+        GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
+    fi
 done
 
 if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a635e655c39..d402a47628c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -71,6 +71,18 @@ option(CUDA_ENABLE_LINEINFO
 option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
 # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
+
+set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
+if(${CUDA_STATIC_RUNTIME})
+  set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL OFF)
+endif()
+option(
+  CUDF_BUILD_STREAMS_TEST_UTIL
+  "Whether to build the utilities for stream testing contained in libcudf"
+  ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL}
+)
+mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
+
 option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
 mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
@@ -754,10 +766,34 @@ if(CUDF_BUILD_TESTUTIL)
     cudftestutil PUBLIC "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
                         "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
   )
-
   add_library(cudf::cudftestutil ALIAS cudftestutil)
 
 endif()
+
+# * build cudf_identify_stream_usage --------------------------------------------------------------
+
+if(CUDF_BUILD_STREAMS_TEST_UTIL)
+  if(CUDA_STATIC_RUNTIME)
+    message(
+      FATAL_ERROR
+        "Stream identification cannot be used with a static CUDA runtime. Please set CUDA_STATIC_RUNTIME=OFF or CUDF_BUILD_STREAMS_TEST_UTIL=OFF."
+    )
+  endif()
+
+  # Libraries for stream-related testing.
+  add_library(cudf_identify_stream_usage SHARED tests/utilities/identify_stream_usage.cpp)
+
+  set_target_properties(
+    cudf_identify_stream_usage
+    PROPERTIES # set target compile options
+               CXX_STANDARD 17
+               CXX_STANDARD_REQUIRED ON
+               POSITION_INDEPENDENT_CODE ON
+  )
+  target_link_libraries(cudf_identify_stream_usage PUBLIC CUDA::cudart rmm::rmm)
+  add_library(cudf::cudf_identify_stream_usage ALIAS cudf_identify_stream_usage)
+endif()
+
 # ##################################################################################################
 # * add tests -------------------------------------------------------------------------------------
 
@@ -830,6 +866,10 @@ if(CUDF_BUILD_TESTUTIL)
   )
 endif()
 
+if(CUDF_BUILD_STREAMS_TEST_UTIL)
+  install(TARGETS cudf_identify_stream_usage DESTINATION ${lib_dir})
+endif()
+
 set(doc_string
     [=[
 Provide targets for the cudf library.
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 053acafdd3d..83a1c14438b 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -508,6 +508,19 @@ ConfigureTest(
 # * bin tests ----------------------------------------------------------------------------------
 ConfigureTest(LABEL_BINS_TEST labeling/label_bins_tests.cpp)
 
+# ##################################################################################################
+# * stream identification tests -------------------------------------------------------------------
+ConfigureTest(
+  STREAM_IDENTIFICATION_TEST identify_stream_usage/test_default_stream_identification.cu
+)
+# Note that this only works when the test is invoked via ctest. At the moment CI is running all
+# tests by manually invoking the executable, so we'll have to manually pass this environment
+# variable in that setup.
+set_tests_properties(
+  STREAM_IDENTIFICATION_TEST PROPERTIES ENVIRONMENT
+                                        LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage>
+)
+
 # ##################################################################################################
 # enable testing ################################################################################
 # ##################################################################################################
diff --git a/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu b/cpp/tests/identify_stream_usage/test_default_stream_identification.cu
similarity index 93%
rename from cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu
rename to cpp/tests/identify_stream_usage/test_default_stream_identification.cu
index 022244b148b..28bb47af40d 100644
--- a/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu
+++ b/cpp/tests/identify_stream_usage/test_default_stream_identification.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,7 +30,7 @@ void test_cudaLaunchKernel()
 
   try {
     kernel<<<1, 1>>>();
-  } catch (std::runtime_error) {
+  } catch (std::runtime_error&) {
     return;
   }
   throw std::runtime_error("No exception raised for kernel on default stream!");
diff --git a/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp b/cpp/tests/utilities/identify_stream_usage.cpp
similarity index 99%
rename from cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp
rename to cpp/tests/utilities/identify_stream_usage.cpp
index 4a1a8f04791..87301a7d49d 100644
--- a/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp
+++ b/cpp/tests/utilities/identify_stream_usage.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt b/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
deleted file mode 100644
index 89f40303550..00000000000
--- a/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
-
-if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/RAPIDS.cmake
-       ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake
-  )
-endif()
-include(${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake)
-
-project(
-  IDENTIFY_STREAM_USAGE
-  VERSION 0.0.1
-  LANGUAGES CXX CUDA
-)
-
-include(rapids-cpm)
-include(${rapids-cmake-dir}/cpm/rmm.cmake)
-rapids_cpm_init()
-rapids_cpm_rmm()
-
-set(CMAKE_CUDA_RUNTIME_LIBRARY SHARED)
-add_library(identify_stream_usage SHARED identify_stream_usage.cpp)
-
-find_package(CUDAToolkit REQUIRED)
-
-set_target_properties(identify_stream_usage PROPERTIES CUDA_RUNTIME_LIBRARY SHARED)
-target_link_libraries(identify_stream_usage PUBLIC CUDA::cudart rmm::rmm)
-
-set_target_properties(
-  identify_stream_usage
-  PROPERTIES # set target compile options
-             CXX_STANDARD 17
-             CXX_STANDARD_REQUIRED ON
-             POSITION_INDEPENDENT_CODE ON
-)
-
-# Add the test file.
-include(CTest)
-
-add_executable(Tests test_default_stream_identification.cu)
-add_test(NAME default_stream_identification COMMAND Tests)
-
-set_tests_properties(
-  default_stream_identification PROPERTIES ENVIRONMENT
-                                           LD_PRELOAD=$<TARGET_FILE:identify_stream_usage>
-)
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 5223bc0a5c7..7457b770b13 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -89,6 +89,7 @@ if(NOT cudf_FOUND)
     # We don't build C++ tests when building wheels, so we can also omit the test util and shrink
     # the wheel by avoiding embedding GTest.
     set(CUDF_BUILD_TESTUTIL OFF)
+    set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
 
     # Statically link cudart if building wheels
     set(CUDA_STATIC_RUNTIME ON)