From b87b64f1b1bc00b36e83f05187a2eeaaf1229029 Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Tue, 7 Feb 2023 20:11:07 -0500
Subject: [PATCH 01/10] Convert `rank` to use to experimental row comparators
 (#12481)

Converts the `rank` function to use experimental row comparators, which support list and struct types. Part of #11844.

[Throughput benchmarks](https://github.com/rapidsai/cudf/pull/12481#issuecomment-1416384229) are available below. It seems like when `size_bytes` is constrained, the generator generates fewer rows in `list` types for increasing depths. That's why, `depth=4` has a higher throughput than `depth=1` because the number of leaf elements generated are the same, but with much fewer rows.

Authors:
  - Divye Gala (https://github.com/divyegala)
  - Jordan Jacobelli (https://github.com/Ethyling)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12481
---
 cpp/benchmarks/CMakeLists.txt               |   5 +-
 cpp/benchmarks/sort/nested_types_common.hpp |  85 +++
 cpp/benchmarks/sort/rank.cpp                |   4 +-
 cpp/benchmarks/sort/rank_lists.cpp          |  49 ++
 cpp/benchmarks/sort/rank_structs.cpp        |  47 ++
 cpp/benchmarks/sort/rank_types_common.hpp   |  52 ++
 cpp/benchmarks/sort/sort_lists.cpp          |  16 +-
 cpp/benchmarks/sort/sort_structs.cpp        |  52 +-
 cpp/src/sort/rank.cu                        |  39 +-
 cpp/tests/sort/rank_test.cpp                | 556 ++++++++++++++++++--
 10 files changed, 776 insertions(+), 129 deletions(-)
 create mode 100644 cpp/benchmarks/sort/nested_types_common.hpp
 create mode 100644 cpp/benchmarks/sort/rank_lists.cpp
 create mode 100644 cpp/benchmarks/sort/rank_structs.cpp
 create mode 100644 cpp/benchmarks/sort/rank_types_common.hpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 6f67cb32b0a..c5ae3345da5 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -169,7 +169,10 @@ ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
 # ##################################################################################################
 # * sort benchmark --------------------------------------------------------------------------------
 ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp)
-ConfigureNVBench(SORT_NVBENCH sort/segmented_sort.cpp sort/sort_lists.cpp sort/sort_structs.cpp)
+ConfigureNVBench(
+  SORT_NVBENCH sort/rank_lists.cpp sort/rank_structs.cpp sort/segmented_sort.cpp
+  sort/sort_lists.cpp sort/sort_structs.cpp
+)
 
 # ##################################################################################################
 # * quantiles benchmark
diff --git a/cpp/benchmarks/sort/nested_types_common.hpp b/cpp/benchmarks/sort/nested_types_common.hpp
new file mode 100644
index 00000000000..c4851823534
--- /dev/null
+++ b/cpp/benchmarks/sort/nested_types_common.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <random>
+
+inline std::unique_ptr<cudf::table> create_lists_data(nvbench::state& state)
+{
+  const size_t size_bytes(state.get_int64("size_bytes"));
+  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
+  auto const null_frequency{state.get_float64("null_frequency")};
+
+  data_profile table_profile;
+  table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
+  table_profile.set_list_depth(depth);
+  table_profile.set_null_probability(null_frequency);
+  return create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+}
+
+inline std::unique_ptr<cudf::table> create_structs_data(nvbench::state& state,
+                                                        cudf::size_type const n_cols = 1)
+{
+  using Type           = int;
+  using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
+  std::default_random_engine generator;
+  std::uniform_int_distribution<int> distribution(0, 100);
+
+  const cudf::size_type n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
+  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
+  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
+
+  // Create columns with values in the range [0,100)
+  std::vector<column_wrapper> columns;
+  columns.reserve(n_cols);
+  std::generate_n(std::back_inserter(columns), n_cols, [&]() {
+    auto const elements = cudf::detail::make_counting_transform_iterator(
+      0, [&](auto row) { return distribution(generator); });
+    if (!nulls) return column_wrapper(elements, elements + n_rows);
+    auto valids =
+      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
+    return column_wrapper(elements, elements + n_rows, valids);
+  });
+
+  std::vector<std::unique_ptr<cudf::column>> cols;
+  std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
+    return col.release();
+  });
+
+  std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
+  // Nest the child columns in a struct, then nest that struct column inside another
+  // struct column up to the desired depth
+  for (int i = 0; i < depth; i++) {
+    std::vector<bool> struct_validity;
+    std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
+    std::generate_n(
+      std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
+    cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
+    child_cols = std::vector<std::unique_ptr<cudf::column>>{};
+    child_cols.push_back(struct_col.release());
+  }
+
+  // Create table view
+  return std::make_unique<cudf::table>(std::move(child_cols));
+}
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index 2c26f4fa15d..6d0a8e5aedd 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@ static void BM_rank(benchmark::State& state, bool nulls)
   // Create columns with values in the range [0,100)
   data_profile profile = data_profile_builder().cardinality(0).distribution(
     cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
-  profile.set_null_probability(nulls ? std::optional{0.01} : std::nullopt);
+  profile.set_null_probability(nulls ? std::optional{0.2} : std::nullopt);
   auto keys = create_random_column(cudf::type_to_id<Type>(), row_count{n_rows}, profile);
 
   for (auto _ : state) {
diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp
new file mode 100644
index 00000000000..f467b639810
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_lists.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_types_common.hpp"
+#include "rank_types_common.hpp"
+
+#include <cudf/sorting.hpp>
+
+#include <cudf_test/column_utilities.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <cudf::rank_method method>
+void nvbench_rank_lists(nvbench::state& state, nvbench::type_list<nvbench::enum_type<method>>)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const table = create_lists_data(state);
+
+  auto const null_frequency{state.get_float64("null_frequency")};
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::rank(table->view().column(0),
+               method,
+               cudf::order::ASCENDING,
+               null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
+               cudf::null_order::AFTER,
+               rmm::mr::get_current_device_resource());
+  });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_rank_lists, NVBENCH_TYPE_AXES(methods))
+  .set_name("rank_lists")
+  .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
+  .add_int64_axis("depth", {1, 4})
+  .add_float64_axis("null_frequency", {0, 0.2});
diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp
new file mode 100644
index 00000000000..c1e2c5bd7dc
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_structs.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_types_common.hpp"
+#include "rank_types_common.hpp"
+
+#include <cudf/sorting.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <cudf::rank_method method>
+void nvbench_rank_structs(nvbench::state& state, nvbench::type_list<nvbench::enum_type<method>>)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const table = create_structs_data(state);
+
+  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::rank(table->view().column(0),
+               method,
+               cudf::order::ASCENDING,
+               nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
+               cudf::null_order::AFTER,
+               rmm::mr::get_current_device_resource());
+  });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_rank_structs, NVBENCH_TYPE_AXES(methods))
+  .set_name("rank_structs")
+  .add_int64_power_of_two_axis("NumRows", {10, 18, 26})
+  .add_int64_axis("Depth", {0, 1, 8})
+  .add_int64_axis("Nulls", {0, 1});
diff --git a/cpp/benchmarks/sort/rank_types_common.hpp b/cpp/benchmarks/sort/rank_types_common.hpp
new file mode 100644
index 00000000000..adb58606c42
--- /dev/null
+++ b/cpp/benchmarks/sort/rank_types_common.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/aggregation.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+enum class rank_method : int32_t {};
+
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+  cudf::rank_method,
+  [](cudf::rank_method value) {
+    switch (value) {
+      case cudf::rank_method::FIRST: return "FIRST";
+      case cudf::rank_method::AVERAGE: return "AVERAGE";
+      case cudf::rank_method::MIN: return "MIN";
+      case cudf::rank_method::MAX: return "MAX";
+      case cudf::rank_method::DENSE: return "DENSE";
+      default: return "unknown";
+    }
+  },
+  [](cudf::rank_method value) {
+    switch (value) {
+      case cudf::rank_method::FIRST: return "cudf::rank_method::FIRST";
+      case cudf::rank_method::AVERAGE: return "cudf::rank_method::AVERAGE";
+      case cudf::rank_method::MIN: return "cudf::rank_method::MIN";
+      case cudf::rank_method::MAX: return "cudf::rank_method::MAX";
+      case cudf::rank_method::DENSE: return "cudf::rank_method::DENSE";
+      default: return "unknown";
+    }
+  })
+
+using methods = nvbench::enum_type_list<cudf::rank_method::AVERAGE,
+                                        cudf::rank_method::DENSE,
+                                        cudf::rank_method::FIRST,
+                                        cudf::rank_method::MAX,
+                                        cudf::rank_method::MIN>;
diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp
index dac865de479..14cc60cbfe7 100644
--- a/cpp/benchmarks/sort/sort_lists.cpp
+++ b/cpp/benchmarks/sort/sort_lists.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/rmm_pool_raii.hpp>
+#include "nested_types_common.hpp"
 
 #include <cudf/detail/sorting.hpp>
 
@@ -25,16 +24,7 @@ void nvbench_sort_lists(nvbench::state& state)
 {
   cudf::rmm_pool_raii pool_raii;
 
-  const size_t size_bytes(state.get_int64("size_bytes"));
-  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
-  auto const null_frequency{state.get_float64("null_frequency")};
-
-  data_profile table_profile;
-  table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
-  table_profile.set_list_depth(depth);
-  table_profile.set_null_probability(null_frequency);
-  auto const table =
-    create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+  auto const table = create_lists_data(state);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp
index 9b6c32940f5..22a6780c237 100644
--- a/cpp/benchmarks/sort/sort_structs.cpp
+++ b/cpp/benchmarks/sort/sort_structs.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,65 +14,21 @@
  * limitations under the License.
  */
 
-#include <benchmarks/fixture/rmm_pool_raii.hpp>
-
-#include <cudf_test/column_wrapper.hpp>
+#include "nested_types_common.hpp"
 
 #include <cudf/detail/sorting.hpp>
 
 #include <nvbench/nvbench.cuh>
 
-#include <random>
-
 void nvbench_sort_struct(nvbench::state& state)
 {
   cudf::rmm_pool_raii pool_raii;
 
-  using Type           = int;
-  using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
-  std::default_random_engine generator;
-  std::uniform_int_distribution<int> distribution(0, 100);
-
-  const cudf::size_type n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
-  const cudf::size_type n_cols{1};
-  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
-  const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};
-
-  // Create columns with values in the range [0,100)
-  std::vector<column_wrapper> columns;
-  columns.reserve(n_cols);
-  std::generate_n(std::back_inserter(columns), n_cols, [&]() {
-    auto const elements = cudf::detail::make_counting_transform_iterator(
-      0, [&](auto row) { return distribution(generator); });
-    if (!nulls) return column_wrapper(elements, elements + n_rows);
-    auto valids =
-      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
-    return column_wrapper(elements, elements + n_rows, valids);
-  });
-
-  std::vector<std::unique_ptr<cudf::column>> cols;
-  std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
-    return col.release();
-  });
-
-  std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
-  // Lets add some layers
-  for (int i = 0; i < depth; i++) {
-    std::vector<bool> struct_validity;
-    std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
-    std::generate_n(
-      std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
-    cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
-    child_cols = std::vector<std::unique_ptr<cudf::column>>{};
-    child_cols.push_back(struct_col.release());
-  }
-
-  // Create table view
-  auto const input = cudf::table(std::move(child_cols));
+  auto const input = create_structs_data(state);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
-    cudf::detail::sorted_order(input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
+    cudf::detail::sorted_order(*input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
   });
 }
 
diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu
index 99e99704c10..461e978643f 100644
--- a/cpp/src/sort/rank.cu
+++ b/cpp/src/sort/rank.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sorting.hpp>
 #include <cudf/sorting.hpp>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
@@ -47,37 +47,26 @@
 namespace cudf {
 namespace detail {
 namespace {
-// Functor to identify unique elements in a sorted order table/column
-template <typename ReturnType, typename Iterator>
-struct unique_comparator {
-  unique_comparator(table_device_view device_table, Iterator const sorted_order, bool has_nulls)
-    : comparator(nullate::DYNAMIC{has_nulls}, device_table, device_table, null_equality::EQUAL),
-      permute(sorted_order)
-  {
-  }
-  __device__ ReturnType operator()(size_type index) const noexcept
-  {
-    return index == 0 || not comparator(permute[index], permute[index - 1]);
-  };
-
- private:
-  row_equality_comparator<nullate::DYNAMIC> comparator;
-  Iterator const permute;
-};
 
 // Assign rank from 1 to n unique values. Equal values get same rank value.
 rmm::device_uvector<size_type> sorted_dense_rank(column_view input_col,
                                                  column_view sorted_order_view,
                                                  rmm::cuda_stream_view stream)
 {
-  auto device_table     = table_device_view::create(table_view{{input_col}}, stream);
+  auto const t_input    = table_view{{input_col}};
+  auto const comparator = cudf::experimental::row::equality::self_comparator{t_input, stream};
+  auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)});
+
+  auto const sorted_index_order = thrust::make_permutation_iterator(
+    sorted_order_view.begin<size_type>(), thrust::make_counting_iterator<size_type>(0));
+  auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) {
+    return static_cast<size_type>(index == 0 ||
+                                  not device_comparator(permute[index], permute[index - 1]));
+  };
+  auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv);
+
   auto const input_size = input_col.size();
   rmm::device_uvector<size_type> dense_rank_sorted(input_size, stream);
-  auto sorted_index_order = thrust::make_permutation_iterator(
-    sorted_order_view.begin<size_type>(), thrust::make_counting_iterator<size_type>(0));
-  auto conv = unique_comparator<size_type, decltype(sorted_index_order)>(
-    *device_table, sorted_index_order, input_col.has_nulls());
-  auto unique_it = cudf::detail::make_counting_transform_iterator(0, conv);
 
   thrust::inclusive_scan(
     rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data());
diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp
index 8461b0a1984..2722c1dfdad 100644
--- a/cpp/tests/sort/rank_test.cpp
+++ b/cpp/tests/sort/rank_test.cpp
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
@@ -30,6 +31,13 @@
 #include <tuple>
 #include <vector>
 
+template <typename T>
+using lists_col   = cudf::test::lists_column_wrapper<T, int32_t>;
+using structs_col = cudf::test::structs_column_wrapper;
+
+using cudf::test::iterators::null_at;
+using cudf::test::iterators::nulls_at;
+
 namespace {
 void run_rank_test(cudf::table_view input,
                    cudf::table_view expected,
@@ -50,10 +58,9 @@ void run_rank_test(cudf::table_view input,
 }
 
 using input_arg_t = std::tuple<cudf::order, cudf::null_policy, cudf::null_order>;
-input_arg_t asce_keep{cudf::order::ASCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::AFTER};
-input_arg_t asce_top{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::BEFORE};
-input_arg_t asce_bottom{
-  cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::AFTER};
+input_arg_t asc_keep{cudf::order::ASCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::AFTER};
+input_arg_t asc_top{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::BEFORE};
+input_arg_t asc_bottom{cudf::order::ASCENDING, cudf::null_policy::INCLUDE, cudf::null_order::AFTER};
 
 input_arg_t desc_keep{
   cudf::order::DESCENDING, cudf::null_policy::EXCLUDE, cudf::null_order::BEFORE};
@@ -105,7 +112,7 @@ TYPED_TEST_SUITE(Rank, cudf::test::NumericTypes);
 
 // fixed_width_column_wrapper<T>   col1{{  5,   4,   3,   5,   8,   5}};
 //                                        3,   2,   1,   4,   6,   5
-TYPED_TEST(Rank, first_asce_keep)
+TYPED_TEST(Rank, first_asc_keep)
 {
   // ASCENDING
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
@@ -113,25 +120,25 @@ TYPED_TEST(Rank, first_asce_keep)
                                                                     {1, 1, 0, 1, 1, 1}};  // KEEP
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, first_asce_top)
+TYPED_TEST(Rank, first_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{
     {3, 2, 1, 4, 6, 5}};  // BEFORE = TOP
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, first_asce_bottom)
+TYPED_TEST(Rank, first_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 4, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{
     {2, 1, 6, 3, 5, 4}};  // AFTER  = BOTTOM
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 3, 6, 4}};
-  this->run_all_tests(cudf::rank_method::FIRST, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::FIRST, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, first_desc_keep)
@@ -163,30 +170,30 @@ TYPED_TEST(Rank, first_desc_bottom)
   this->run_all_tests(cudf::rank_method::FIRST, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_keep)
+TYPED_TEST(Rank, dense_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, -1, 2, 3, 2},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_top)
+TYPED_TEST(Rank, dense_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, dense_asce_bottom)
+TYPED_TEST(Rank, dense_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 4, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, 4, 2, 3, 2}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 3, 1, 2, 4, 2}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, dense_desc_keep)
@@ -215,30 +222,30 @@ TYPED_TEST(Rank, dense_desc_bottom)
   this->run_all_tests(cudf::rank_method::DENSE, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_keep)
+TYPED_TEST(Rank, min_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, -1, 2, 5, 2},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_top)
+TYPED_TEST(Rank, min_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, min_asce_bottom)
+TYPED_TEST(Rank, min_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{3, 2, 1, 3, 6, 3}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{2, 1, 6, 2, 5, 2}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{2, 5, 1, 2, 6, 2}};
-  this->run_all_tests(cudf::rank_method::MIN, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MIN, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, min_desc_keep)
@@ -267,30 +274,30 @@ TYPED_TEST(Rank, min_desc_bottom)
   this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_keep)
+TYPED_TEST(Rank, max_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{4, 1, -1, 4, 5, 4},
                                                                     {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4},
                                                                     {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_top)
+TYPED_TEST(Rank, max_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, max_asce_bottom)
+TYPED_TEST(Rank, max_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col1_rank{{5, 2, 1, 5, 6, 5}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col2_rank{{4, 1, 6, 4, 5, 4}};
   cudf::test::fixed_width_column_wrapper<cudf::size_type> col3_rank{{4, 5, 1, 4, 6, 4}};
-  this->run_all_tests(cudf::rank_method::MAX, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::MAX, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, max_desc_keep)
@@ -319,28 +326,28 @@ TYPED_TEST(Rank, max_desc_bottom)
   this->run_all_tests(cudf::rank_method::MAX, desc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_keep)
+TYPED_TEST(Rank, average_asc_keep)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{3, 1, -1, 3, 5, 3}, {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}, {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_keep, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_keep, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_top)
+TYPED_TEST(Rank, average_asc_top)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_top, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_top, col1_rank, col2_rank, col3_rank);
 }
 
-TYPED_TEST(Rank, average_asce_bottom)
+TYPED_TEST(Rank, average_asc_bottom)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{4, 2, 1, 4, 6, 4}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{3, 1, 6, 3, 5, 3}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{3, 5, 1, 3, 6, 3}};
-  this->run_all_tests(cudf::rank_method::AVERAGE, asce_bottom, col1_rank, col2_rank, col3_rank);
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_bottom, col1_rank, col2_rank, col3_rank);
 }
 
 TYPED_TEST(Rank, average_desc_keep)
@@ -368,30 +375,30 @@ TYPED_TEST(Rank, average_desc_bottom)
 }
 
 // percentage==true (dense, not-dense)
-TYPED_TEST(Rank, dense_asce_keep_pct)
+TYPED_TEST(Rank, dense_asc_keep_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{
     {2.0 / 3.0, 1.0 / 3.0, -1., 2.0 / 3.0, 1., 2.0 / 3.0}, {1, 1, 0, 1, 1, 1}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5},
                                                            {1, 1, 1, 1, 1, 1}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_keep, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col1_rank, col2_rank, col3_rank, true);
 }
 
-TYPED_TEST(Rank, dense_asce_top_pct)
+TYPED_TEST(Rank, dense_asc_top_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_top, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col1_rank, col2_rank, col3_rank, true);
 }
 
-TYPED_TEST(Rank, dense_asce_bottom_pct)
+TYPED_TEST(Rank, dense_asc_bottom_pct)
 {
   cudf::test::fixed_width_column_wrapper<double> col1_rank{{0.75, 0.5, 0.25, 0.75, 1., 0.75}};
   cudf::test::fixed_width_column_wrapper<double> col2_rank{{0.5, 0.25, 1., 0.5, 0.75, 0.5}};
   cudf::test::fixed_width_column_wrapper<double> col3_rank{{0.5, 0.75, 0.25, 0.5, 1., 0.5}};
-  this->run_all_tests(cudf::rank_method::DENSE, asce_bottom, col1_rank, col2_rank, col3_rank, true);
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col1_rank, col2_rank, col3_rank, true);
 }
 
 TYPED_TEST(Rank, min_desc_keep_pct)
@@ -444,3 +451,472 @@ TEST_F(RankLarge, average_large)
   cudf::test::fixed_width_column_wrapper<double, int> expected(iter + 1, iter + 10559);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);
 }
+
+template <typename T>
+struct RankListAndStruct : public cudf::test::BaseFixture {
+  void run_all_tests(cudf::rank_method method,
+                     input_arg_t input_arg,
+                     cudf::column_view const list_rank,
+                     cudf::column_view const struct_rank,
+                     bool percentage = false)
+  {
+    if constexpr (std::is_same_v<T, bool>) { return; }
+    /*
+    [
+      [],
+      [1],
+      [2, 2],
+      [2, 3],
+      [2, 2],
+      [1],
+      [],
+      NULL
+      [2],
+      NULL,
+      [1]
+    ]
+    */
+    auto list_col =
+      lists_col<T>{{{}, {1}, {2, 2}, {2, 3}, {2, 2}, {1}, {}, {} /*NULL*/, {2}, {} /*NULL*/, {1}},
+                   nulls_at({7, 9})};
+
+    // clang-format off
+    /*
+      +------------+
+      |           s|
+      +------------+
+    0 |   {0, null}|
+    1 |   {1, null}|
+    2 |        null|
+    3 |{null, null}|
+    4 |        null|
+    5 |{null, null}|
+    6 |   {null, 1}|
+    7 |   {null, 0}|
+      +------------+
+    */
+    std::vector<bool>                           struct_valids{1, 1, 0, 1, 0, 1, 1, 1};
+    auto col1       = cudf::test::fixed_width_column_wrapper<T>{{ 0,  1,  9, -1,  9, -1, -1, -1}, {1, 1, 1, 0, 1, 0, 0, 0}};
+    auto col2       = cudf::test::fixed_width_column_wrapper<T>{{-1, -1,  9, -1,  9, -1,  1,  0}, {0, 0, 1, 0, 1, 0, 1, 1}};
+    auto struct_col = cudf::test::structs_column_wrapper{{col1, col2}, struct_valids}.release();
+    // clang-format on
+
+    for (auto const& test_case : {
+           // Non-null column
+           test_case_t{cudf::table_view{{list_col}}, cudf::table_view{{list_rank}}},
+           // Null column
+           test_case_t{cudf::table_view{{struct_col->view()}}, cudf::table_view{{struct_rank}}},
+         }) {
+      auto [input, output] = test_case;
+
+      run_rank_test(input,
+                    output,
+                    method,
+                    std::get<0>(input_arg),
+                    std::get<1>(input_arg),
+                    std::get<2>(input_arg),
+                    percentage);
+    }
+  }
+};
+
+TYPED_TEST_SUITE(RankListAndStruct, cudf::test::NumericTypes);
+
+TYPED_TEST(RankListAndStruct, first_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> list_rank{
+    {1, 3, 7, 9, 8, 4, 2, -1, 6, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 6, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_keep, list_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    3, 5, 9, 11, 10, 6, 4, 1, 8, 2, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 1, 3, 2, 4, 6, 5};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    1, 3, 7, 9, 8, 4, 2, 10, 6, 11, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 7, 5, 8, 6, 4, 3};
+  this->run_all_tests(cudf::rank_method::FIRST, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {8, 5, 2, 1, 3, 6, 9, -1, 4, -1, 7}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 6, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    10, 7, 4, 3, 5, 8, 11, 1, 6, 2, 9};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 1, 3, 2, 4, 5, 6};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, first_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    8, 5, 2, 1, 3, 6, 9, 10, 4, 11, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 7, 5, 8, 6, 3, 4};
+  this->run_all_tests(cudf::rank_method::FIRST, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {1, 2, 4, 5, 4, 2, 1, -1, 3, -1, 2}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 5, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{2, 3, 5, 6, 5, 3, 2, 1, 4, 1, 3};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{5, 6, 1, 2, 1, 2, 4, 3};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{1, 2, 4, 5, 4, 2, 1, 6, 3, 6, 2};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 6, 5, 6, 5, 4, 3};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {5, 4, 2, 1, 2, 4, 5, -1, 3, -1, 4}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 5, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{6, 5, 3, 2, 3, 5, 6, 1, 4, 1, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{6, 5, 1, 2, 1, 2, 3, 4};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{5, 4, 2, 1, 2, 4, 5, 6, 3, 6, 4};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 6, 5, 6, 5, 3, 4};
+  this->run_all_tests(cudf::rank_method::DENSE, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {1, 3, 7, 9, 7, 3, 1, -1, 6, -1, 3}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 5, -1, 5, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    3, 5, 9, 11, 9, 5, 3, 1, 8, 1, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 1, 3, 1, 3, 6, 5};
+  this->run_all_tests(cudf::rank_method::MIN, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    1, 3, 7, 9, 7, 3, 1, 10, 6, 10, 3};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 7, 5, 7, 5, 4, 3};
+  this->run_all_tests(cudf::rank_method::MIN, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {8, 5, 2, 1, 2, 5, 8, -1, 4, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 5, -1, 5, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    10, 7, 4, 3, 4, 7, 10, 1, 6, 1, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 1, 3, 1, 3, 5, 6};
+  this->run_all_tests(cudf::rank_method::MIN, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    8, 5, 2, 1, 2, 5, 8, 10, 4, 10, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 7, 5, 7, 5, 3, 4};
+  this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {2, 5, 8, 9, 8, 5, 2, -1, 6, -1, 5}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{1, 2, -1, 6, -1, 6, 4, 3},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MAX, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    4, 7, 10, 11, 10, 7, 4, 2, 8, 2, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{7, 8, 2, 4, 2, 4, 6, 5};
+  this->run_all_tests(cudf::rank_method::MAX, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    2, 5, 8, 9, 8, 5, 2, 11, 6, 11, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{1, 2, 8, 6, 8, 6, 4, 3};
+  this->run_all_tests(cudf::rank_method::MAX, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    {9, 7, 3, 1, 3, 7, 9, -1, 4, -1, 7}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{{2, 1, -1, 6, -1, 6, 3, 4},
+                                                                      nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MAX, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    11, 9, 5, 3, 5, 9, 11, 2, 6, 2, 9};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{8, 7, 2, 4, 2, 4, 5, 6};
+  this->run_all_tests(cudf::rank_method::MAX, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, max_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> col_rank{
+    9, 7, 3, 1, 3, 7, 9, 11, 4, 11, 7};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> struct_rank{2, 1, 8, 6, 8, 6, 3, 4};
+  this->run_all_tests(cudf::rank_method::MAX, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_keep)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    {1.5, 4.0, 7.5, 9.0, 7.5, 4.0, 1.5, -1.0, 6.0, -1.0, 4.0}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {1.0, 2.0, -1.0, 5.5, -1.0, 5.5, 4.0, 3.0}, nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_top)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    3.5, 6.0, 9.5, 11.0, 9.5, 6.0, 3.5, 1.5, 8.0, 1.5, 6.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    7.0, 8.0, 1.5, 3.5, 1.5, 3.5, 6.0, 5.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_asc_bottom)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    1.5, 4.0, 7.5, 9.0, 7.5, 4.0, 1.5, 10.5, 6.0, 10.5, 4.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0, 2.0, 7.5, 5.5, 7.5, 5.5, 4.0, 3.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, asc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_keep)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    {8.5, 6.0, 2.5, 1.0, 2.5, 6.0, 8.5, -1.0, 4.0, -1.0, 6.0}, nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {2.0, 1.0, -1.0, 5.5, -1.0, 5.5, 3.0, 4.0}, nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_keep, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_top)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    10.5, 8.0, 4.5, 3.0, 4.5, 8.0, 10.5, 1.5, 6.0, 1.5, 8.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    8.0, 7.0, 1.5, 3.5, 1.5, 3.5, 5.0, 6.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_top, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, average_desc_bottom)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{
+    8.5, 6.0, 2.5, 1.0, 2.5, 6.0, 8.5, 10.5, 4.0, 10.5, 6.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    2.0, 1.0, 7.5, 5.5, 7.5, 5.5, 3.0, 4.0};
+  this->run_all_tests(cudf::rank_method::AVERAGE, desc_bottom, col_rank, struct_rank);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_keep_pct)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{{1.0 / 5.0,
+                                                           2.0 / 5.0,
+                                                           4.0 / 5.0,
+                                                           1.0,
+                                                           4.0 / 5.0,
+                                                           2.0 / 5.0,
+                                                           1.0 / 5.0,
+                                                           -1.0,
+                                                           3.0 / 5.0,
+                                                           -1.0,
+                                                           2.0 / 5.0},
+                                                          nulls_at({7, 9})};
+
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {1.0 / 5.0, 2.0 / 5.0, -1.0, 1.0, -1.0, 1.0, 4.0 / 5.0, 3.0 / 5.0}, nulls_at({2, 4})};
+
+  this->run_all_tests(cudf::rank_method::DENSE, asc_keep, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_top_pct)
+{
+  // ASCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{1.0 / 3.0,
+                                                          1.0 / 2.0,
+                                                          5.0 / 6.0,
+                                                          1.0,
+                                                          5.0 / 6.0,
+                                                          1.0 / 2.0,
+                                                          1.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          2.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          1.0 / 2.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    5.0 / 6.0, 1.0, 1.0 / 6.0, 2.0 / 6.0, 1.0 / 6.0, 2.0 / 6.0, 4.0 / 6.0, 3.0 / 6.0};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_top, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, dense_asc_bottom_pct)
+{
+  // ASCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{1.0 / 6.0,
+                                                          1.0 / 3.0,
+                                                          2.0 / 3.0,
+                                                          5.0 / 6.0,
+                                                          2.0 / 3.0,
+                                                          1.0 / 3.0,
+                                                          1.0 / 6.0,
+                                                          1.0,
+                                                          1.0 / 2.0,
+                                                          1.0,
+                                                          1.0 / 3.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0 / 6.0, 2.0 / 6.0, 1.0, 5.0 / 6.0, 1.0, 5.0 / 6.0, 4.0 / 6.0, 3.0 / 6.0};
+  this->run_all_tests(cudf::rank_method::DENSE, asc_bottom, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_keep_pct)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{{8.0 / 9.0,
+                                                           5.0 / 9.0,
+                                                           2.0 / 9.0,
+                                                           1.0 / 9.0,
+                                                           2.0 / 9.0,
+                                                           5.0 / 9.0,
+                                                           8.0 / 9.0,
+                                                           -1.0,
+                                                           4.0 / 9.0,
+                                                           -1.0,
+                                                           5.0 / 9.0},
+                                                          nulls_at({7, 9})};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    {2.0 / 6.0, 1.0 / 6.0, -1.0, 5.0 / 6.0, -1.0, 5.0 / 6.0, 3.0 / 6.0, 4.0 / 6.0},
+    nulls_at({2, 4})};
+  this->run_all_tests(cudf::rank_method::MIN, desc_keep, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_top_pct)
+{
+  // DESCENDING and null_order::AFTER
+  cudf::test::fixed_width_column_wrapper<double> col_rank{10.0 / 11.0,
+                                                          7.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          3.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          7.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          6.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          7.0 / 11.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    1.0, 7.0 / 8.0, 1.0 / 8.0, 3.0 / 8.0, 1.0 / 8.0, 3.0 / 8.0, 5.0 / 8.0, 6.0 / 8.0};
+  this->run_all_tests(cudf::rank_method::MIN, desc_top, col_rank, struct_rank, true);
+}
+
+TYPED_TEST(RankListAndStruct, min_desc_bottom_pct)
+{
+  // DESCENDING and null_order::BEFORE
+  cudf::test::fixed_width_column_wrapper<double> col_rank{8.0 / 11.0,
+                                                          5.0 / 11.0,
+                                                          2.0 / 11.0,
+                                                          1.0 / 11.0,
+                                                          2.0 / 11.0,
+                                                          5.0 / 11.0,
+                                                          8.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          4.0 / 11.0,
+                                                          10.0 / 11.0,
+                                                          5.0 / 11.0};
+  cudf::test::fixed_width_column_wrapper<double> struct_rank{
+    2.0 / 8.0, 1.0 / 8.0, 7.0 / 8.0, 5.0 / 8.0, 7.0 / 8.0, 5.0 / 8.0, 3.0 / 8.0, 4.0 / 8.0};
+  this->run_all_tests(cudf::rank_method::MIN, desc_bottom, col_rank, struct_rank, true);
+}

From fea6288f5f114790fd7d075a6b6c26b2f78a8316 Mon Sep 17 00:00:00 2001
From: Cindy Jiang <47068112+cindyyuanjiang@users.noreply.github.com>
Date: Tue, 7 Feb 2023 17:15:28 -0800
Subject: [PATCH 02/10] Add `regex_program` strings splitting java APIs and
 tests (#12713)

This PR adds [split_re, rsplit_re, split_record_re, rsplit_record_re](https://docs.rapids.ai/api/libcudf/nightly/split__re_8hpp.html) related `regex_program` java APIs and unit tests.
Part of work for https://github.com/NVIDIA/spark-rapids/issues/7295.

Authors:
  - Cindy Jiang (https://github.com/cindyyuanjiang)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12713
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 152 +++++++++++++++---
 java/src/main/native/src/ColumnViewJni.cpp    |  98 +++++++----
 .../java/ai/rapids/cudf/ColumnVectorTest.java |  60 ++++---
 3 files changed, 234 insertions(+), 76 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 4daa3c17cfc..2d0bf28225f 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2531,12 +2531,34 @@ public final ColumnVector stringLocate(Scalar substring, int start, int end) {
    *                     regular expression pattern or just by a string literal delimiter.
    * @return list of strings columns as a table.
    */
+  @Deprecated
   public final Table stringSplit(String pattern, int limit, boolean splitByRegex) {
+    if (splitByRegex) {
+      return stringSplit(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit);
+    } else {
+      return stringSplit(pattern, limit);
+    }
+  }
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regex program pattern.
+   * The number of rows in the output columns will be the same as the input column. Null entries
+   * are added for the rows where split results have been exhausted. Null input entries result in
+   * all nulls in the corresponding rows of the output columns.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   * @return list of strings columns as a table.
+   */
+  public final Table stringSplit(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be a String";
-    assert pattern != null : "pattern is null";
-    assert pattern.length() > 0 : "empty pattern is not supported";
+    assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
-    return new Table(stringSplit(this.getNativeView(), pattern, limit, splitByRegex));
+    return new Table(stringSplitRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                   regexProg.capture().nativeId, limit));
   }
 
   /**
@@ -2550,6 +2572,7 @@ public final Table stringSplit(String pattern, int limit, boolean splitByRegex)
    *                     regular expression pattern or just by a string literal delimiter.
    * @return list of strings columns as a table.
    */
+  @Deprecated
   public final Table stringSplit(String pattern, boolean splitByRegex) {
     return stringSplit(pattern, -1, splitByRegex);
   }
@@ -2567,7 +2590,10 @@ public final Table stringSplit(String pattern, boolean splitByRegex) {
    * @return list of strings columns as a table.
    */
   public final Table stringSplit(String delimiter, int limit) {
-    return stringSplit(delimiter, limit, false);
+    assert type.equals(DType.STRING) : "column type must be a String";
+    assert delimiter != null : "delimiter is null";
+    assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
+    return new Table(stringSplit(this.getNativeView(), delimiter, limit));
   }
 
   /**
@@ -2580,7 +2606,21 @@ public final Table stringSplit(String delimiter, int limit) {
    * @return list of strings columns as a table.
    */
   public final Table stringSplit(String delimiter) {
-    return stringSplit(delimiter, -1, false);
+    return stringSplit(delimiter, -1);
+  }
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regex program pattern.
+   * The number of rows in the output columns will be the same as the input column. Null entries
+   * are added for the rows where split results have been exhausted. Null input entries result in
+   * all nulls in the corresponding rows of the output columns.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @return list of strings columns as a table.
+   */
+  public final Table stringSplit(RegexProgram regexProg) {
+    return stringSplit(regexProg, -1);
   }
 
   /**
@@ -2595,13 +2635,33 @@ public final Table stringSplit(String delimiter) {
    *                     regular expression pattern or just by a string literal delimiter.
    * @return a LIST column of string elements.
    */
+  @Deprecated
   public final ColumnVector stringSplitRecord(String pattern, int limit, boolean splitByRegex) {
+    if (splitByRegex) {
+      return stringSplitRecord(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit);
+    } else {
+      return stringSplitRecord(pattern, limit);
+    }
+  }
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regex program pattern.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   * @return a LIST column of string elements.
+   */
+  public final ColumnVector stringSplitRecord(RegexProgram regexProg, int limit) {
     assert type.equals(DType.STRING) : "column type must be String";
-    assert pattern != null : "pattern is null";
-    assert pattern.length() > 0 : "empty pattern is not supported";
+    assert regexProg != null : "regex program is null";
     assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
     return new ColumnVector(
-        stringSplitRecord(this.getNativeView(), pattern, limit, splitByRegex));
+        stringSplitRecordRe(this.getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                            regexProg.capture().nativeId, limit));
   }
 
   /**
@@ -2613,6 +2673,7 @@ public final ColumnVector stringSplitRecord(String pattern, int limit, boolean s
    *                     regular expression pattern or just by a string literal delimiter.
    * @return a LIST column of string elements.
    */
+  @Deprecated
   public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex) {
     return stringSplitRecord(pattern, -1, splitByRegex);
   }
@@ -2628,7 +2689,10 @@ public final ColumnVector stringSplitRecord(String pattern, boolean splitByRegex
    * @return a LIST column of string elements.
    */
   public final ColumnVector stringSplitRecord(String delimiter, int limit) {
-    return stringSplitRecord(delimiter, limit, false);
+    assert type.equals(DType.STRING) : "column type must be String";
+    assert delimiter != null : "delimiter is null";
+    assert limit != 0 && limit != 1 : "split limit == 0 and limit == 1 are not supported";
+    return new ColumnVector(stringSplitRecord(this.getNativeView(), delimiter, limit));
   }
 
   /**
@@ -2639,7 +2703,19 @@ public final ColumnVector stringSplitRecord(String delimiter, int limit) {
    * @return a LIST column of string elements.
    */
   public final ColumnVector stringSplitRecord(String delimiter) {
-    return stringSplitRecord(delimiter, -1, false);
+    return stringSplitRecord(delimiter, -1);
+  }
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regex program pattern.
+   *
+   * @param regexProg the regex program with UTF-8 encoded string identifying the split pattern
+   *                  for each input string.
+   * @return a LIST column of string elements.
+   */
+  public final ColumnVector stringSplitRecord(RegexProgram regexProg) {
+    return stringSplitRecord(regexProg, -1);
   }
 
   /**
@@ -3958,36 +4034,64 @@ private static native long repeatStringsWithColumnRepeatTimes(long stringsHandle
   private static native long substringLocate(long columnView, long substringScalar, int start, int end);
 
   /**
-   * Returns a list of columns by splitting each string using the specified pattern. The number of
-   * rows in the output columns will be the same as the input column. Null entries are added for a
-   * row where split results have been exhausted. Null input entries result in all nulls in the
-   * corresponding rows of the output columns.
+   * Returns a list of columns by splitting each string using the specified string literal
+   * delimiter. The number of rows in the output columns will be the same as the input column.
+   * Null entries are added for the rows where split results have been exhausted. Null input entries
+   * result in all nulls in the corresponding rows of the output columns.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
-   * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   */
+  private static native long[] stringSplit(long nativeHandle, String delimiter, int limit);
+
+  /**
+   * Returns a list of columns by splitting each string using the specified regular expression
+   * pattern. The number of rows in the output columns will be the same as the input column.
+   * Null entries are added for the rows where split results have been exhausted. Null input entries
+   * result in all nulls in the corresponding rows of the output columns.
+   *
+   * @param nativeHandle native handle of the input strings column that being operated on.
+   * @param pattern UTF-8 encoded string identifying the split regular expression pattern for
+   *                each input string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
-   * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
-   *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long[] stringSplit(long nativeHandle, String pattern, int limit,
-                                           boolean splitByRegex);
+  private static native long[] stringSplitRe(long nativeHandle, String pattern, int flags,
+                                             int capture, int limit);
 
   /**
    * Returns a column that are lists of strings in which each list is made by splitting the
    * corresponding input string using the specified string literal delimiter.
    *
    * @param nativeHandle native handle of the input strings column that being operated on.
-   * @param pattern UTF-8 encoded string identifying the split pattern for each input string.
+   * @param delimiter UTF-8 encoded string identifying the split delimiter for each input string.
+   * @param limit the maximum size of the list resulting from splitting each input string,
+   *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
+   *              trailing empty strings) and limit = 1 (no split at all) are not supported.
+   */
+  private static native long stringSplitRecord(long nativeHandle, String delimiter, int limit);
+
+  /**
+   * Returns a column that are lists of strings in which each list is made by splitting the
+   * corresponding input string using the specified regular expression pattern.
+   *
+   * @param nativeHandle native handle of the input strings column that being operated on.
+   * @param pattern UTF-8 encoded string identifying the split regular expression pattern for
+   *                each input string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param limit the maximum size of the list resulting from splitting each input string,
    *              or -1 for all possible splits. Note that limit = 0 (all possible splits without
    *              trailing empty strings) and limit = 1 (no split at all) are not supported.
-   * @param splitByRegex a boolean flag indicating whether the input strings will be split by a
-   *                     regular expression pattern or just by a string literal delimiter.
    */
-  private static native long stringSplitRecord(long nativeHandle, String pattern, int limit,
-                                               boolean splitByRegex);
+  private static native long stringSplitRecordRe(long nativeHandle, String pattern, int flags,
+                                                 int capture, int limit);
 
   /**
    * Native method to calculate substring from a given string column. 0 indexing.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index bfa3fa0a522..958efd364ed 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -681,9 +681,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reverseStringsOrLists(JNI
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *env, jclass,
                                                                         jlong input_handle,
-                                                                        jstring pattern_obj,
-                                                                        jint limit,
-                                                                        jboolean split_by_regex) {
+                                                                        jstring delimiter_obj,
+                                                                        jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -697,21 +696,42 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const input = reinterpret_cast<cudf::column_view *>(input_handle);
-    auto const strs_input = cudf::strings_column_view{*input};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
+    auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
+    auto const max_split = limit > 1 ? limit - 1 : limit;
+    auto result = cudf::strings::split(strings_column, cudf::string_scalar{delimiter}, max_split);
+    return cudf::jni::convert_table_for_return(env, std::move(result));
+  }
+  CATCH_STD(env, 0);
+}
 
-    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
-    if (pattern_jstr.is_empty()) {
-      // Java's split API produces different behaviors than cudf when splitting with empty
-      // pattern.
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0);
-    }
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRe(
+    JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jint limit) {
+  JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
+  if (limit == 0 || limit == 1) {
+    // Cannot achieve the results of splitting with limit == 0 or limit == 1.
+    // This is because cudf operates on a different parameter (`max_split`) which is converted from
+    // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an
+    // unlimited split.
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "limit == 0 and limit == 1 are not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result = split_by_regex ?
-                      cudf::strings::split_re(strs_input, pattern, max_split) :
-                      cudf::strings::split(strs_input, cudf::string_scalar{pattern}, max_split);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+    auto result = cudf::strings::split_re(strings_column, *regex_prog, max_split);
     return cudf::jni::convert_table_for_return(env, std::move(result));
   }
   CATCH_STD(env, 0);
@@ -719,9 +739,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringSplit(JNIEnv *
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv *env, jclass,
                                                                          jlong input_handle,
-                                                                         jstring pattern_obj,
-                                                                         jint limit,
-                                                                         jboolean split_by_regex) {
+                                                                         jstring delimiter_obj,
+                                                                         jint limit) {
   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
   if (limit == 0 || limit == 1) {
@@ -735,22 +754,43 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecord(JNIEnv
 
   try {
     cudf::jni::auto_set_device(env);
-    auto const input = reinterpret_cast<cudf::column_view *>(input_handle);
-    auto const strs_input = cudf::strings_column_view{*input};
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const delimiter_jstr = cudf::jni::native_jstring(env, delimiter_obj);
+    auto const delimiter = std::string(delimiter_jstr.get(), delimiter_jstr.size_bytes());
+    auto const max_split = limit > 1 ? limit - 1 : limit;
+    auto result =
+        cudf::strings::split_record(strings_column, cudf::string_scalar{delimiter}, max_split);
+    return release_as_jlong(result);
+  }
+  CATCH_STD(env, 0);
+}
 
-    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
-    if (pattern_jstr.is_empty()) {
-      // Java's split API produces different behaviors than cudf when splitting with empty
-      // pattern.
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Empty pattern is not supported", 0);
-    }
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringSplitRecordRe(
+    JNIEnv *env, jclass, jlong input_handle, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jint limit) {
+  JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
 
+  if (limit == 0 || limit == 1) {
+    // Cannot achieve the results of splitting with limit == 0 or limit == 1.
+    // This is because cudf operates on a different parameter (`max_split`) which is converted from
+    // limit. When limit == 0 or limit == 1, max_split will be non-positive and will result in an
+    // unlimited split.
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "limit == 0 and limit == 1 are not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const *>(input_handle);
+    auto const strings_column = cudf::strings_column_view{*input};
+    auto const pattern_jstr = cudf::jni::native_jstring(env, pattern_obj);
     auto const pattern = std::string(pattern_jstr.get(), pattern_jstr.size_bytes());
     auto const max_split = limit > 1 ? limit - 1 : limit;
-    auto result =
-        split_by_regex ?
-            cudf::strings::split_record_re(strs_input, pattern, max_split) :
-            cudf::strings::split_record(strs_input, cudf::string_scalar{pattern}, max_split);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern, flags, groups);
+    auto result = cudf::strings::split_record_re(strings_column, *regex_prog, max_split);
     return release_as_jlong(result);
   }
   CATCH_STD(env, 0);
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 46264b7d668..ab4baf74277 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4990,28 +4990,29 @@ void testReverseList() {
   void testStringSplit() {
     String pattern = " ";
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
-        "ARé some things", "test strings here");
+             "ARé some things", "test strings here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
-         .column("Héllo", "thésé", null, "", "ARé", "test")
-         .column("there all", null, null, null, "some things", "strings here")
-         .build();
+             .column("Héllo", "thésé", null, "", "ARé", "test")
+             .column("there all", null, null, null, "some things", "strings here")
+             .build();
          Table expectedSplitAll = new Table.TestBuilder()
-         .column("Héllo", "thésé", null, "", "ARé", "test")
-         .column("there", null, null, null, "some", "strings")
-         .column("all", null, null, null, "things", "here")
-         .build();
+             .column("Héllo", "thésé", null, "", "ARé", "test")
+             .column("there", null, null, null, "some", "strings")
+             .column("all", null, null, null, "things", "here")
+             .build();
          Table resultSplitLimit2 = v.stringSplit(pattern, 2);
          Table resultSplitAll = v.stringSplit(pattern)) {
-          assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-          assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+      assertTablesAreEqual(expectedSplitAll, resultSplitAll);
     }
   }
 
   @Test
   void testStringSplitByRegularExpression() {
     String pattern = "[_ ]";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "",
-        "ARé some_things", "test_strings_here");
+             "ARé some_things", "test_strings_here");
          Table expectedSplitLimit2 = new Table.TestBuilder()
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there all", null, null, null, "some_things", "strings_here")
@@ -5020,11 +5021,17 @@ void testStringSplitByRegularExpression() {
              .column("Héllo", "thésé", null, "", "ARé", "test")
              .column("there", null, null, null, "some", "strings")
              .column("all", null, null, null, "things", "here")
-             .build();
-         Table resultSplitLimit2 = v.stringSplit(pattern, 2, true);
-         Table resultSplitAll = v.stringSplit(pattern, true)) {
-      assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
-      assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+             .build()) {
+      try (Table resultSplitLimit2 = v.stringSplit(pattern, 2, true);
+           Table resultSplitAll = v.stringSplit(pattern, true)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (Table resultSplitLimit2 = v.stringSplit(regexProg, 2);
+           Table resultSplitAll = v.stringSplit(regexProg)) {
+        assertTablesAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertTablesAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 
@@ -5032,7 +5039,7 @@ void testStringSplitByRegularExpression() {
   void testStringSplitRecord() {
     String pattern = " ";
     try (ColumnVector v = ColumnVector.fromStrings("Héllo there all", "thésé", null, "",
-        "ARé some things", "test strings here");
+             "ARé some things", "test strings here");
          ColumnVector expectedSplitLimit2 = ColumnVector.fromLists(
              new HostColumnVector.ListType(true,
                  new HostColumnVector.BasicType(true, DType.STRING)),
@@ -5061,8 +5068,9 @@ void testStringSplitRecord() {
   @Test
   void testStringSplitRecordByRegularExpression() {
     String pattern = "[_ ]";
+    RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
     try (ColumnVector v = ColumnVector.fromStrings("Héllo_there all", "thésé", null, "",
-        "ARé some_things", "test_strings_here");
+             "ARé some_things", "test_strings_here");
          ColumnVector expectedSplitLimit2 = ColumnVector.fromLists(
              new HostColumnVector.ListType(true,
                  new HostColumnVector.BasicType(true, DType.STRING)),
@@ -5080,11 +5088,17 @@ void testStringSplitRecordByRegularExpression() {
              null,
              Arrays.asList(""),
              Arrays.asList("ARé", "some", "things"),
-             Arrays.asList("test", "strings", "here"));
-         ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true);
-         ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) {
-      assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
-      assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+             Arrays.asList("test", "strings", "here"))) {
+      try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(pattern, 2, true);
+           ColumnVector resultSplitAll = v.stringSplitRecord(pattern, true)) {
+        assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+      }
+      try (ColumnVector resultSplitLimit2 = v.stringSplitRecord(regexProg, 2);
+           ColumnVector resultSplitAll = v.stringSplitRecord(regexProg)) {
+        assertColumnsAreEqual(expectedSplitLimit2, resultSplitLimit2);
+        assertColumnsAreEqual(expectedSplitAll, resultSplitAll);
+      }
     }
   }
 

From b8ae0e4b41c541c5b2b27417af30fa1b9afcbdce Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Tue, 7 Feb 2023 23:33:56 -0600
Subject: [PATCH 03/10] Support conversion to/from cudf in
 dask.dataframe.core.to_backend (#12380)

This PR corresponds to the `cudf` component of https://github.com/dask/dask/pull/9758

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12380
---
 python/dask_cudf/dask_cudf/backends.py        | 194 +++++++++++-------
 python/dask_cudf/dask_cudf/tests/test_core.py |  55 ++++-
 2 files changed, 170 insertions(+), 79 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index b6be5ade6ba..821ec103204 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -11,6 +11,10 @@
 
 import dask.dataframe as dd
 from dask import config
+from dask.dataframe.backends import (
+    DataFrameBackendEntrypoint,
+    PandasBackendEntrypoint,
+)
 from dask.dataframe.core import get_parallel_type, meta_nonempty
 from dask.dataframe.dispatch import (
     categorical_dtype_dispatch,
@@ -30,7 +34,7 @@
     make_meta_obj,
 )
 from dask.sizeof import sizeof as sizeof_dispatch
-from dask.utils import is_arraylike
+from dask.utils import Dispatch, is_arraylike
 
 import cudf
 from cudf.api.types import is_string_dtype
@@ -446,91 +450,127 @@ def _default_backend(func, *args, **kwargs):
         return func(*args, **kwargs)
 
 
-try:
+def _unsupported_kwargs(old, new, kwargs):
+    # Utility to raise a meaningful error when
+    # unsupported kwargs are encountered within
+    # ``to_backend_dispatch``
+    if kwargs:
+        raise ValueError(
+            f"Unsupported key-word arguments used in `to_backend` "
+            f"for {old}-to-{new} conversion: {kwargs}"
+        )
 
-    # Define "cudf" backend engine to be registered with Dask
-    from dask.dataframe.backends import DataFrameBackendEntrypoint
-
-    class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
-        """Backend-entrypoint class for Dask-DataFrame
-
-        This class is registered under the name "cudf" for the
-        ``dask.dataframe.backends`` entrypoint in ``setup.cfg``.
-        Dask-DataFrame will use the methods defined in this class
-        in place of ``dask.dataframe.<creation-method>`` when the
-        "dataframe.backend" configuration is set to "cudf":
-
-        Examples
-        --------
-        >>> import dask
-        >>> import dask.dataframe as dd
-        >>> with dask.config.set({"dataframe.backend": "cudf"}):
-        ...     ddf = dd.from_dict({"a": range(10)})
-        >>> type(ddf)
-        <class 'dask_cudf.core.DataFrame'>
-        """
-
-        @staticmethod
-        def from_dict(
-            data,
-            npartitions,
-            orient="columns",
-            dtype=None,
-            columns=None,
-            constructor=cudf.DataFrame,
-        ):
-
-            return _default_backend(
-                dd.from_dict,
-                data,
-                npartitions=npartitions,
-                orient=orient,
-                dtype=dtype,
-                columns=columns,
-                constructor=constructor,
-            )
 
-        @staticmethod
-        def read_parquet(*args, engine=None, **kwargs):
-            from dask_cudf.io.parquet import CudfEngine
+# Register cudf->pandas
+to_pandas_dispatch = PandasBackendEntrypoint.to_backend_dispatch()
 
-            return _default_backend(
-                dd.read_parquet,
-                *args,
-                engine=CudfEngine,
-                **kwargs,
-            )
 
-        @staticmethod
-        def read_json(*args, **kwargs):
-            from dask_cudf.io.json import read_json
+@to_pandas_dispatch.register((cudf.DataFrame, cudf.Series, cudf.Index))
+def to_pandas_dispatch_from_cudf(data, nullable=False, **kwargs):
+    _unsupported_kwargs("cudf", "pandas", kwargs)
+    return data.to_pandas(nullable=nullable)
 
-            return read_json(*args, **kwargs)
 
-        @staticmethod
-        def read_orc(*args, **kwargs):
-            from dask_cudf.io import read_orc
+# Register pandas->cudf
+to_cudf_dispatch = Dispatch("to_cudf_dispatch")
 
-            return read_orc(*args, **kwargs)
 
-        @staticmethod
-        def read_csv(*args, **kwargs):
-            from dask_cudf.io import read_csv
+@to_cudf_dispatch.register((pd.DataFrame, pd.Series, pd.Index))
+def to_cudf_dispatch_from_pandas(data, nan_as_null=None, **kwargs):
+    _unsupported_kwargs("pandas", "cudf", kwargs)
+    return cudf.from_pandas(data, nan_as_null=nan_as_null)
 
-            return read_csv(*args, **kwargs)
 
-        @staticmethod
-        def read_hdf(*args, **kwargs):
-            from dask_cudf import from_dask_dataframe
+# Define "cudf" backend engine to be registered with Dask
+class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
+    """Backend-entrypoint class for Dask-DataFrame
 
-            # HDF5 reader not yet implemented in cudf
-            warnings.warn(
-                "read_hdf is not yet implemented in cudf/dask_cudf. "
-                "Moving to cudf from pandas. Expect poor performance!"
-            )
-            return from_dask_dataframe(
-                _default_backend(dd.read_hdf, *args, **kwargs)
-            )
+    This class is registered under the name "cudf" for the
+    ``dask.dataframe.backends`` entrypoint in ``setup.cfg``.
+    Dask-DataFrame will use the methods defined in this class
+    in place of ``dask.dataframe.<creation-method>`` when the
+    "dataframe.backend" configuration is set to "cudf":
 
-except ImportError:
-    pass
+    Examples
+    --------
+    >>> import dask
+    >>> import dask.dataframe as dd
+    >>> with dask.config.set({"dataframe.backend": "cudf"}):
+    ...     ddf = dd.from_dict({"a": range(10)})
+    >>> type(ddf)
+    <class 'dask_cudf.core.DataFrame'>
+    """
+
+    @classmethod
+    def to_backend_dispatch(cls):
+        return to_cudf_dispatch
+
+    @classmethod
+    def to_backend(cls, data: dd.core._Frame, **kwargs):
+        if isinstance(data._meta, (cudf.DataFrame, cudf.Series, cudf.Index)):
+            # Already a cudf-backed collection
+            _unsupported_kwargs("cudf", "cudf", kwargs)
+            return data
+        return data.map_partitions(cls.to_backend_dispatch(), **kwargs)
+
+    @staticmethod
+    def from_dict(
+        data,
+        npartitions,
+        orient="columns",
+        dtype=None,
+        columns=None,
+        constructor=cudf.DataFrame,
+    ):
+
+        return _default_backend(
+            dd.from_dict,
+            data,
+            npartitions=npartitions,
+            orient=orient,
+            dtype=dtype,
+            columns=columns,
+            constructor=constructor,
+        )
+
+    @staticmethod
+    def read_parquet(*args, engine=None, **kwargs):
+        from dask_cudf.io.parquet import CudfEngine
+
+        return _default_backend(
+            dd.read_parquet,
+            *args,
+            engine=CudfEngine,
+            **kwargs,
+        )
+
+    @staticmethod
+    def read_json(*args, **kwargs):
+        from dask_cudf.io.json import read_json
+
+        return read_json(*args, **kwargs)
+
+    @staticmethod
+    def read_orc(*args, **kwargs):
+        from dask_cudf.io import read_orc
+
+        return read_orc(*args, **kwargs)
+
+    @staticmethod
+    def read_csv(*args, **kwargs):
+        from dask_cudf.io import read_csv
+
+        return read_csv(*args, **kwargs)
+
+    @staticmethod
+    def read_hdf(*args, **kwargs):
+        from dask_cudf import from_dask_dataframe
+
+        # HDF5 reader not yet implemented in cudf
+        warnings.warn(
+            "read_hdf is not yet implemented in cudf/dask_cudf. "
+            "Moving to cudf from pandas. Expect poor performance!"
+        )
+        return from_dask_dataframe(
+            _default_backend(dd.read_hdf, *args, **kwargs)
+        )
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index ee8229bc7e8..7f8876c8564 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from packaging import version
 
 import dask
 from dask import dataframe as dd
@@ -31,6 +32,58 @@ def test_from_dict_backend_dispatch():
     dd.assert_eq(expect, ddf)
 
 
+def test_to_backend():
+    np.random.seed(0)
+    data = {
+        "x": np.random.randint(0, 5, size=10000),
+        "y": np.random.normal(size=10000),
+    }
+    with dask.config.set({"dataframe.backend": "pandas"}):
+        ddf = dd.from_dict(data, npartitions=2)
+        assert isinstance(ddf._meta, pd.DataFrame)
+
+        gdf = ddf.to_backend("cudf")
+        assert isinstance(gdf, dgd.DataFrame)
+        dd.assert_eq(cudf.DataFrame(data), ddf)
+
+        assert isinstance(gdf.to_backend()._meta, pd.DataFrame)
+
+
+def test_to_backend_kwargs():
+    data = {"x": [0, 2, np.nan, 3, 4, 5]}
+    with dask.config.set({"dataframe.backend": "pandas"}):
+        dser = dd.from_dict(data, npartitions=2)["x"]
+        assert isinstance(dser._meta, pd.Series)
+
+        # Using `nan_as_null=False` will result in a cudf-backed
+        # Series with a NaN element (ranther than <NA>)
+        gser_nan = dser.to_backend("cudf", nan_as_null=False)
+        assert isinstance(gser_nan, dgd.Series)
+        assert np.isnan(gser_nan.compute()).sum() == 1
+
+        # Using `nan_as_null=True` will result in a cudf-backed
+        # Series with a <NA> element (ranther than NaN)
+        gser_null = dser.to_backend("cudf", nan_as_null=True)
+        assert isinstance(gser_null, dgd.Series)
+        assert np.isnan(gser_null.compute()).sum() == 0
+
+        # Check `nullable` argument for `cudf.Series.to_pandas`
+        dser_null = gser_null.to_backend("pandas", nullable=False)
+        assert dser_null.compute().dtype == "float"
+        dser_null = gser_null.to_backend("pandas", nullable=True)
+        assert isinstance(dser_null.compute().dtype, pd.Float64Dtype)
+
+        # Check unsupported arguments
+        with pytest.raises(ValueError, match="pandas-to-cudf"):
+            dser.to_backend("cudf", bad_arg=True)
+
+        with pytest.raises(ValueError, match="cudf-to-cudf"):
+            gser_null.to_backend("cudf", bad_arg=True)
+
+        with pytest.raises(ValueError, match="cudf-to-pandas"):
+            gser_null.to_backend("pandas", bad_arg=True)
+
+
 def test_from_cudf():
     np.random.seed(0)
 
@@ -547,8 +600,6 @@ def test_unary_ops(func, gdf, gddf):
 
     # Fixed in https://github.com/dask/dask/pull/4657
     if isinstance(p, cudf.Index):
-        from packaging import version
-
         if version.parse(dask.__version__) < version.parse("1.1.6"):
             pytest.skip(
                 "dask.dataframe assert_eq index check hardcoded to "

From 8ad4166c7026482a53a60f47b56dd5e1dec1a463 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:39:42 -0500
Subject: [PATCH 04/10] Remove cudf::strings::repeat_strings_output_sizes and
 optional parameter from cudf::strings::repeat_strings (#12609)

Removes `cudf::strings::repeat_strings_output_sizes` and the optional sizes parameter from `cudf::strings::repeat_strings`.
This function (and corresponding optional parameter) is no longer needed now that the internal utilities will throw an error if the column output size exceeds the maximum.
Closes #12542

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12609
---
 cpp/benchmarks/string/repeat_strings.cpp    |  56 +-----
 cpp/include/cudf/strings/repeat_strings.hpp |  94 +++-------
 cpp/src/strings/repeat_strings.cu           | 194 +++-----------------
 cpp/tests/strings/repeat_strings_tests.cpp  | 121 +-----------
 4 files changed, 56 insertions(+), 409 deletions(-)

diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp
index 1844e93bc53..fe015b27f13 100644
--- a/cpp/benchmarks/string/repeat_strings.cpp
+++ b/cpp/benchmarks/string/repeat_strings.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -79,42 +79,6 @@ static void BM_repeat_strings_column_times(benchmark::State& state)
                           (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
 }
 
-static void BM_compute_output_strings_sizes(benchmark::State& state)
-{
-  auto const n_rows           = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length   = static_cast<cudf::size_type>(state.range(1));
-  auto const table            = create_data_table(2, n_rows, max_str_length);
-  auto const strings_col      = cudf::strings_column_view(table->view().column(0));
-  auto const repeat_times_col = table->view().column(1);
-
-  for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
-    cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
-  }
-
-  state.SetBytesProcessed(state.iterations() *
-                          (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
-}
-
-static void BM_repeat_strings_column_times_precomputed_sizes(benchmark::State& state)
-{
-  auto const n_rows           = static_cast<cudf::size_type>(state.range(0));
-  auto const max_str_length   = static_cast<cudf::size_type>(state.range(1));
-  auto const table            = create_data_table(2, n_rows, max_str_length);
-  auto const strings_col      = cudf::strings_column_view(table->view().column(0));
-  auto const repeat_times_col = table->view().column(1);
-  [[maybe_unused]] auto const [sizes, total_bytes] =
-    cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
-
-  for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
-    cudf::strings::repeat_strings(strings_col, repeat_times_col, *sizes);
-  }
-
-  state.SetBytesProcessed(state.iterations() *
-                          (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t)));
-}
-
 static void generate_bench_args(benchmark::internal::Benchmark* b)
 {
   int const min_rows   = 1 << 8;
@@ -145,23 +109,5 @@ class RepeatStrings : public cudf::benchmark {
     ->UseManualTime()                                               \
     ->Unit(benchmark::kMillisecond);
 
-#define COMPUTE_OUTPUT_STRINGS_SIZES_BENCHMARK_DEFINE(name)          \
-  BENCHMARK_DEFINE_F(RepeatStrings, name)                            \
-  (::benchmark::State & st) { BM_compute_output_strings_sizes(st); } \
-  BENCHMARK_REGISTER_F(RepeatStrings, name)                          \
-    ->Apply(generate_bench_args)                                     \
-    ->UseManualTime()                                                \
-    ->Unit(benchmark::kMillisecond);
-
-#define REPEAT_STRINGS_COLUMN_TIMES_PRECOMPUTED_SIZES_BENCHMARK_DEFINE(name)          \
-  BENCHMARK_DEFINE_F(RepeatStrings, name)                                             \
-  (::benchmark::State & st) { BM_repeat_strings_column_times_precomputed_sizes(st); } \
-  BENCHMARK_REGISTER_F(RepeatStrings, name)                                           \
-    ->Apply(generate_bench_args)                                                      \
-    ->UseManualTime()                                                                 \
-    ->Unit(benchmark::kMillisecond);
-
 REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times)
 REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times)
-COMPUTE_OUTPUT_STRINGS_SIZES_BENCHMARK_DEFINE(compute_output_strings_sizes)
-REPEAT_STRINGS_COLUMN_TIMES_PRECOMPUTED_SIZES_BENCHMARK_DEFINE(precomputed_sizes)
diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp
index 0e6ee2126d3..26fe5f95983 100644
--- a/cpp/include/cudf/strings/repeat_strings.hpp
+++ b/cpp/include/cudf/strings/repeat_strings.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,15 +32,15 @@ namespace strings {
  */
 
 /**
- * @brief Repeat the given string scalar by a given number of times.
+ * @brief Repeat the given string scalar a given number of times
  *
  * An output string scalar is generated by repeating the input string by a number of times given by
- * the @p `repeat_times` parameter.
+ * the `repeat_times` parameter.
  *
  * In special cases:
- *  - If @p `repeat_times` is not a positive value, an empty (valid) string scalar will be returned.
+ *  - If `repeat_times` is not a positive value, an empty (valid) string scalar will be returned.
  *  - An invalid input scalar will always result in an invalid output scalar regardless of the
- *    value of @p `repeat_times` parameter.
+ *    value of `repeat_times` parameter.
  *
  * @code{.pseudo}
  * Example:
@@ -50,13 +50,13 @@ namespace strings {
  * @endcode
  *
  * @throw cudf::logic_error if the size of the output string scalar exceeds the maximum value that
- *        can be stored by the index type
- *        (i.e., @code input.size() * repeat_times > numeric_limits<size_type>::max() @endcode).
+ *        can be stored by the index type:
+ *        `input.size() * repeat_times > max of size_type`
  *
- * @param input The scalar containing the string to repeat.
- * @param repeat_times The number of times the input string is repeated.
- * @param mr Device memory resource used to allocate the returned string scalar.
- * @return New string scalar in which the input string is repeated.
+ * @param input The scalar containing the string to repeat
+ * @param repeat_times The number of times the input string is repeated
+ * @param mr Device memory resource used to allocate the returned string scalar
+ * @return New string scalar in which the input string is repeated
  */
 std::unique_ptr<string_scalar> repeat_string(
   string_scalar const& input,
@@ -64,19 +64,16 @@ std::unique_ptr<string_scalar> repeat_string(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Repeat each string in the given strings column by a given number of times.
+ * @brief Repeat each string in the given strings column a given number of times
  *
- * An output strings column is generated by repeating each string from the input strings column by a
- * number of times given by the @p `repeat_times` parameter.
+ * An output strings column is generated by repeating each string from the input strings column by
+ * the number of times given by the `repeat_times` parameter.
  *
  * In special cases:
- *  - If @p `repeat_times` is not a positive number, a non-null input string will always result in
+ *  - If `repeat_times` is not a positive number, a non-null input string will always result in
  *    an empty output string.
  *  - A null input string will always result in a null output string regardless of the value of the
- *    @p `repeat_times` parameter.
- *
- * The caller is responsible for checking the output column size will not exceed the maximum size of
- * a strings column (number of total characters is less than the max size_type value).
+ *    `repeat_times` parameter.
  *
  * @code{.pseudo}
  * Example:
@@ -85,10 +82,10 @@ std::unique_ptr<string_scalar> repeat_string(
  * out is ['aaaaaa', null, '', 'bbcbbcbbc']
  * @endcode
  *
- * @param input The column containing strings to repeat.
- * @param repeat_times The number of times each input string is repeated.
- * @param mr Device memory resource used to allocate the returned strings column.
- * @return New column containing the repeated strings.
+ * @param input The column containing strings to repeat
+ * @param repeat_times The number of times each input string is repeated
+ * @param mr Device memory resource used to allocate the returned strings column
+ * @return New column containing the repeated strings
  */
 std::unique_ptr<column> repeat_strings(
   strings_column_view const& input,
@@ -97,11 +94,10 @@ std::unique_ptr<column> repeat_strings(
 
 /**
  * @brief Repeat each string in the given strings column by the numbers of times given in another
- * numeric column.
+ * numeric column
  *
  * An output strings column is generated by repeating each of the input string by a number of times
- * given by the corresponding row in a @p `repeat_times` numeric column. The computational time can
- * be reduced if sizes of the output strings are known and provided.
+ * given by the corresponding row in a `repeat_times` numeric column.
  *
  * In special cases:
  *  - Any null row (from either the input strings column or the `repeat_times` column) will always
@@ -109,9 +105,6 @@ std::unique_ptr<column> repeat_strings(
  *  - If any value in the `repeat_times` column is not a positive number and its corresponding input
  *    string is not null, the output string will be an empty string.
  *
- * The caller is responsible for checking the output column size will not exceed the maximum size of
- * a strings column (number of total characters is less than the max size_type value).
- *
  * @code{.pseudo}
  * Example:
  * strs         = ['aa', null, '', 'bbc-']
@@ -120,51 +113,16 @@ std::unique_ptr<column> repeat_strings(
  * out is ['aa', null, '', 'bbc-bbc-bbc-bbc-']
  * @endcode
  *
- * @throw cudf::logic_error if the input `repeat_times` column has data type other than integer.
+ * @throw cudf::logic_error if the input `repeat_times` is not an integer type
  * @throw cudf::logic_error if the input columns have different sizes.
  *
- * @param input The column containing strings to repeat.
+ * @param input The column containing strings to repeat
  * @param repeat_times The column containing numbers of times that the corresponding input strings
- *        are repeated.
- * @param output_strings_sizes The optional column containing pre-computed sizes of the output
- *        strings.
- * @param mr Device memory resource used to allocate the returned strings column.
+ *                     are repeated
+ * @param mr Device memory resource used to allocate the returned strings column
  * @return New column containing the repeated strings.
  */
 std::unique_ptr<column> repeat_strings(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  std::optional<column_view> output_strings_sizes = std::nullopt,
-  rmm::mr::device_memory_resource* mr             = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Compute sizes of the output strings if each string in the input strings column
- * is repeated by the numbers of times given in another numeric column.
- *
- * The output column storing string output sizes is not nullable. These string sizes are
- * also summed up and returned (in an `int64_t` value), which can be used to detect if the input
- * strings column can be safely repeated without data corruption due to overflow in string indexing.
- *
- * @code{.pseudo}
- * Example:
- * strs         = ['aa', null, '', 'bbc-']
- * repeat_times = [ 1,   2,     3,  4   ]
- * [output_sizes, total_size] = repeat_strings_output_sizes(strs, repeat_times)
- * out is [2, 0, 0, 16], and total_size = 18
- * @endcode
- *
- * @throw cudf::logic_error if the input `repeat_times` column has data type other than integer.
- * @throw cudf::logic_error if the input columns have different sizes.
- *
- * @param input The column containing strings to repeat.
- * @param repeat_times The column containing numbers of times that the corresponding input strings
- *        are repeated.
- * @param mr Device memory resource used to allocate the returned strings column.
- * @return A pair with the first item is an int32_t column containing sizes of the output strings,
- *         and the second item is an int64_t number containing the total sizes (in bytes) of the
- *         output strings column.
- */
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
   strings_column_view const& input,
   column_view const& repeat_times,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu
index cc283fbcee2..3784b535a5b 100644
--- a/cpp/src/strings/repeat_strings.cu
+++ b/cpp/src/strings/repeat_strings.cu
@@ -176,7 +176,7 @@ namespace {
  * separate number of times.
  */
 template <class Iterator>
-struct compute_size_and_repeat_separately_fn {
+struct compute_sizes_and_repeat_fn {
   column_device_view const strings_dv;
   column_device_view const repeat_times_dv;
   Iterator const repeat_times_iter;
@@ -189,146 +189,63 @@ struct compute_size_and_repeat_separately_fn {
   // If d_chars != nullptr: only repeat strings.
   char* d_chars{nullptr};
 
-  __device__ int64_t operator()(size_type const idx) const noexcept
+  __device__ void operator()(size_type const idx) const noexcept
   {
     auto const string_is_valid = !strings_has_nulls || strings_dv.is_valid_nocheck(idx);
     auto const rtimes_is_valid = !rtimes_has_nulls || repeat_times_dv.is_valid_nocheck(idx);
 
     // Any null input (either string or repeat_times value) will result in a null output.
     auto const is_valid = string_is_valid && rtimes_is_valid;
+    if (!is_valid) {
+      if (!d_chars) { d_offsets[idx] = 0; }
+      return;
+    }
 
-    // When the input string is null, `repeat_times` and `string_size` are also set to 0.
-    // This makes sure that if `repeat_times > 0` then we will always have a valid input string,
-    // and if `repeat_times <= 0` we will never copy anything to the output.
-    auto const repeat_times = is_valid ? repeat_times_iter[idx] : size_type{0};
-    auto const string_size =
-      is_valid ? strings_dv.element<string_view>(idx).size_bytes() : size_type{0};
-
-    // The output_size is returned, and it needs to be an int64_t number to prevent overflow.
-    auto const output_size =
-      repeat_times > 0 ? static_cast<int64_t>(repeat_times) * static_cast<int64_t>(string_size)
-                       : int64_t{0};
+    auto repeat_times = repeat_times_iter[idx];
+    auto const d_str  = strings_dv.element<string_view>(idx);
 
     if (!d_chars) {
-      // If overflow happen, the stored value of output string size will be incorrect due to
-      // downcasting. In such cases, the entire output string size array should be discarded.
-      d_offsets[idx] = static_cast<offset_type>(output_size);
-    } else if (repeat_times > 0 && string_size > 0) {
-      auto const d_str     = strings_dv.element<string_view>(idx);
-      auto const input_ptr = d_str.data();
-      auto output_ptr      = d_chars + d_offsets[idx];
-      for (size_type repeat_idx = 0; repeat_idx < repeat_times; ++repeat_idx) {
-        output_ptr = copy_and_increment(output_ptr, input_ptr, string_size);
+      // repeat_times could be negative
+      d_offsets[idx] = (repeat_times > 0) ? (repeat_times * d_str.size_bytes()) : 0;
+    } else {
+      auto output_ptr = d_chars + d_offsets[idx];
+      while (repeat_times-- > 0) {
+        output_ptr = copy_and_increment(output_ptr, d_str.data(), d_str.size_bytes());
       }
     }
-
-    // The output_size value may be used to sum up to detect overflow at the caller site.
-    // The caller can detect overflow easily by checking `SUM(output_size) > INT_MAX`.
-    return output_size;
   }
 };
 
-/**
- * @brief Creates child offsets and chars columns by applying the template function that
- * can be used for computing the output size of each string as well as create the output.
- *
- * This function is similar to `strings::detail::make_strings_children`, except that it accepts an
- * optional input `std::optional<column_view>` that can contain the precomputed sizes of the output
- * strings.
- *
- * @deprecated This will be removed with issue 12542
- */
-template <typename Func>
-auto make_strings_children(Func fn,
-                           size_type exec_size,
-                           size_type strings_count,
-                           std::optional<column_view> output_strings_sizes,
-                           rmm::cuda_stream_view stream,
-                           rmm::mr::device_memory_resource* mr)
-{
-  auto offsets_column = make_numeric_column(
-    data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
-
-  auto offsets_view = offsets_column->mutable_view();
-  auto d_offsets    = offsets_view.template data<size_type>();
-  fn.d_offsets      = d_offsets;
-
-  // This may be called twice -- once for offsets and once for chars.
-  auto for_each_fn = [exec_size, stream](Func& fn) {
-    thrust::for_each_n(
-      rmm::exec_policy(stream), thrust::make_counting_iterator<size_type>(0), exec_size, fn);
-  };
-
-  if (!output_strings_sizes.has_value()) {
-    // Compute the output sizes only if they are not given.
-    for_each_fn(fn);
-
-    // Compute the offsets values.
-    auto const bytes =
-      cudf::detail::sizes_to_offsets(d_offsets, d_offsets + strings_count + 1, d_offsets, stream);
-    CUDF_EXPECTS(bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
-                 "Size of output exceeds column size limit");
-  } else {
-    // Compute the offsets values from the provided output string sizes.
-    auto const string_sizes = output_strings_sizes.value();
-    CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(offset_type), stream.value()));
-    thrust::inclusive_scan(rmm::exec_policy(stream),
-                           string_sizes.template begin<size_type>(),
-                           string_sizes.template end<size_type>(),
-                           d_offsets + 1);
-  }
-
-  // Now build the chars column
-  auto const bytes  = cudf::detail::get_value<size_type>(offsets_view, strings_count, stream);
-  auto chars_column = create_chars_child_column(bytes, stream, mr);
-
-  // Execute the function fn again to fill the chars column.
-  // Note that if the output chars column has zero size, the function fn should not be called to
-  // avoid accidentally overwriting the offsets.
-  if (bytes > 0) {
-    fn.d_chars = chars_column->mutable_view().template data<char>();
-    for_each_fn(fn);
-  }
-
-  return std::pair(std::move(offsets_column), std::move(chars_column));
-}
-
 }  // namespace
 
 std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                                        column_view const& repeat_times,
-                                       std::optional<column_view> output_strings_sizes,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(input.size() == repeat_times.size(), "The input columns must have the same size.");
   CUDF_EXPECTS(cudf::is_index_type(repeat_times.type()),
                "repeat_strings expects an integer type for the `repeat_times` input column.");
-  if (output_strings_sizes.has_value()) {
-    auto const output_sizes = output_strings_sizes.value();
-    CUDF_EXPECTS(input.size() == output_sizes.size() &&
-                   (!output_sizes.nullable() || !output_sizes.has_nulls()),
-                 "The given column of output string sizes is invalid.");
-  }
 
   auto const strings_count = input.size();
   if (strings_count == 0) { return make_empty_column(type_id::STRING); }
 
   auto const strings_dv_ptr      = column_device_view::create(input.parent(), stream);
   auto const repeat_times_dv_ptr = column_device_view::create(repeat_times, stream);
-  auto const strings_has_nulls   = input.has_nulls();
-  auto const rtimes_has_nulls    = repeat_times.has_nulls();
   auto const repeat_times_iter =
     cudf::detail::indexalator_factory::make_input_iterator(repeat_times);
-  auto const fn = compute_size_and_repeat_separately_fn<decltype(repeat_times_iter)>{
-    *strings_dv_ptr, *repeat_times_dv_ptr, repeat_times_iter, strings_has_nulls, rtimes_has_nulls};
-
-  auto [offsets_column, chars_column] =
-    make_strings_children(fn, strings_count, strings_count, output_strings_sizes, stream, mr);
-
-  // We generate new bitmask by AND of the input columns' bitmasks.
-  // Note that if the input columns are nullable, the output column will also be nullable (which may
-  // not have nulls).
+  auto const fn =
+    compute_sizes_and_repeat_fn<decltype(repeat_times_iter)>{*strings_dv_ptr,
+                                                             *repeat_times_dv_ptr,
+                                                             repeat_times_iter,
+                                                             input.has_nulls(),
+                                                             repeat_times.has_nulls()};
+
+  auto [offsets_column, chars_column] = make_strings_children(fn, strings_count, stream, mr);
+
+  // We generate new bitmask by AND of the two input columns' bitmasks.
+  // Note that if either of the input columns are nullable, the output column will also be nullable
+  // but may not have nulls.
   auto [null_mask, null_count] =
     cudf::detail::bitmask_and(table_view{{input.parent(), repeat_times}}, stream, mr);
 
@@ -338,52 +255,6 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                              null_count,
                              std::move(null_mask));
 }
-
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
-{
-  CUDF_EXPECTS(input.size() == repeat_times.size(), "The input columns must have the same size.");
-  CUDF_EXPECTS(
-    cudf::is_index_type(repeat_times.type()),
-    "repeat_strings_output_sizes expects an integer type for the `repeat_times` input column.");
-
-  auto const strings_count = input.size();
-  if (strings_count == 0) {
-    return std::pair(make_empty_column(type_to_id<size_type>()), int64_t{0});
-  }
-
-  auto output_sizes = make_numeric_column(
-    data_type{type_to_id<size_type>()}, strings_count, mask_state::UNALLOCATED, stream, mr);
-
-  auto const strings_dv_ptr      = column_device_view::create(input.parent(), stream);
-  auto const repeat_times_dv_ptr = column_device_view::create(repeat_times, stream);
-  auto const strings_has_nulls   = input.has_nulls();
-  auto const rtimes_has_nulls    = repeat_times.has_nulls();
-  auto const repeat_times_iter =
-    cudf::detail::indexalator_factory::make_input_iterator(repeat_times);
-
-  auto const fn = compute_size_and_repeat_separately_fn<decltype(repeat_times_iter)>{
-    *strings_dv_ptr,
-    *repeat_times_dv_ptr,
-    repeat_times_iter,
-    strings_has_nulls,
-    rtimes_has_nulls,
-    output_sizes->mutable_view().template begin<size_type>()};
-
-  auto const total_bytes =
-    thrust::transform_reduce(rmm::exec_policy(stream),
-                             thrust::make_counting_iterator<size_type>(0),
-                             thrust::make_counting_iterator<size_type>(strings_count),
-                             fn,
-                             int64_t{0},
-                             thrust::plus{});
-
-  return std::pair(std::move(output_sizes), total_bytes);
-}
-
 }  // namespace detail
 
 std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
@@ -404,21 +275,10 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
 
 std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                                        column_view const& repeat_times,
-                                       std::optional<column_view> output_strings_sizes,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat_strings(
-    input, repeat_times, output_strings_sizes, cudf::get_default_stream(), mr);
-}
-
-std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
-  strings_column_view const& input,
-  column_view const& repeat_times,
-  rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::repeat_strings_output_sizes(input, repeat_times, cudf::get_default_stream(), mr);
+  return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/tests/strings/repeat_strings_tests.cpp b/cpp/tests/strings/repeat_strings_tests.cpp
index 69d0494c253..e75409d9f39 100644
--- a/cpp/tests/strings/repeat_strings_tests.cpp
+++ b/cpp/tests/strings/repeat_strings_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -207,20 +207,6 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesInvalidInput)
     EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times), cudf::logic_error);
   }
 
-  // Sizes mismatched between strings column and output_strings_sizes column.
-  {
-    auto const repeat_times = int32s_col{1, 2};
-    auto const sizes        = int32s_col{1, 2, 3, 4, 5};
-    EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times, sizes), cudf::logic_error);
-  }
-
-  // output_strings_sizes column has nulls.
-  {
-    auto const repeat_times = int32s_col{1, 2};
-    auto const sizes        = int32s_col{{null, 2}, null_at(0)};
-    EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times, sizes), cudf::logic_error);
-  }
-
   // Invalid data type for repeat_times column.
   {
     auto const repeat_times = cudf::test::fixed_width_column_wrapper<float>{1, 2, 3, 4, 5, 6};
@@ -243,11 +229,7 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesOverflowOutput)
   auto const repeat_times =
     int32s_col{half_max, half_max, half_max, half_max, half_max, half_max, half_max};
 
-  auto const [sizes, total_bytes] =
-    cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-  (void)sizes;
-  auto const expected_bytes = static_cast<int64_t>(half_max) * int64_t{1 + 2 + 3 + 4 + 5 + 6 + 7};
-  EXPECT_EQ(expected_bytes, total_bytes);
+  EXPECT_THROW(cudf::strings::repeat_strings(strs_cv, repeat_times), cudf::logic_error);
 }
 
 TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithScalarRepeatTimes)
@@ -301,15 +283,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 12, 27, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // repeat_times column has nulls.
@@ -320,15 +293,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnNoNullWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 27, 12, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -377,15 +341,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 12, 27};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(45, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the middle of the column.
@@ -397,15 +352,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{12, 27};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(39, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column.
@@ -417,15 +363,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnNoNullWithColumnRepeatTime
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{27, 12, 12};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(51, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -520,15 +457,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnWithNullsWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 18, 0, 0, 0, 12, 12, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(48, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // repeat_times column has nulls.
@@ -549,15 +477,6 @@ TYPED_TEST(RepeatStringsTypedTest, StringsColumnWithNullsWithColumnRepeatTimes)
 
     auto results = cudf::strings::repeat_strings(strs_cv, repeat_times);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 0, 0, 0, 0, 12, 0, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(strs_cv, repeat_times);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(18, total_bytes);
-
-    results = cudf::strings::repeat_strings(strs_cv, repeat_times, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 }
 
@@ -631,15 +550,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{6, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(6, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the middle of the column.
@@ -652,15 +562,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{0, 0, 0, 0, 12};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(12, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column, output has nulls.
@@ -672,15 +573,6 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{12, 0, 0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(12, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_strs, *results, verbosity);
   }
 
   // Sliced the second half of the column, output does not have null.
@@ -693,14 +585,5 @@ TYPED_TEST(RepeatStringsTypedTest, SlicedStringsColumnWithNullsWithColumnRepeatT
 
     auto results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_strs, *results, verbosity);
-
-    auto const expected_sizes = int32s_col{0, 0};
-    auto const [sizes, total_bytes] =
-      cudf::strings::repeat_strings_output_sizes(sliced_strs_cv, sliced_rtimes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_sizes, *sizes, verbosity);
-    EXPECT_EQ(0, total_bytes);
-
-    results = cudf::strings::repeat_strings(sliced_strs_cv, sliced_rtimes, *sizes);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_strs, *results, verbosity);
   }
 }

From 476d5bbf9cfdbcef024bdccc29f30cd1c6fdbc94 Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:02:08 -0600
Subject: [PATCH 05/10] Handle parquet list data corner case (#12698)

Fixes an issue with a particular arrangement of page data related to lists.  Specifically, it is possible for page `N` to contain "0" rows because the values for the row it is a part of start on page `N-1` and end on page `N+1`.  This was defeating logic in the decode kernel that would erroneously cause these values to be skipped.

Similar to https://github.com/rapidsai/cudf/pull/12488 this is only reproducible with data out in the wild.  In this case, we have a file that we could in theory check in to create a test with, but it is 16 MB so it's fairly large.  Looking for feedback on whether this is too big.

Authors:
  - https://github.com/nvdbaranec
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12698
---
 cpp/src/io/parquet/page_data.cu | 50 +++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 23d130e1585..ee115e7432a 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -104,20 +104,41 @@ struct page_state_s {
  * specified row bounds
  *
  * @param s The page to be checked
- * @param min_row The starting row index
+ * @param start_row The starting row index
  * @param num_rows The number of rows
  *
  * @return True if the page spans the beginning or the end of the row bounds
  */
-inline __device__ bool is_bounds_page(page_state_s* const s, size_t min_row, size_t num_rows)
+inline __device__ bool is_bounds_page(page_state_s* const s, size_t start_row, size_t num_rows)
 {
   size_t const page_begin = s->col.start_row + s->page.chunk_row;
   size_t const page_end   = page_begin + s->page.num_rows;
-  size_t const begin      = min_row;
-  size_t const end        = min_row + num_rows;
+  size_t const begin      = start_row;
+  size_t const end        = start_row + num_rows;
+
   return ((page_begin <= begin && page_end >= begin) || (page_begin <= end && page_end >= end));
 }
 
+/**
+ * @brief Returns whether or not a page is completely contained within the specified
+ * row bounds
+ *
+ * @param s The page to be checked
+ * @param start_row The starting row index
+ * @param num_rows The number of rows
+ *
+ * @return True if the page is completely contained within the row bounds
+ */
+inline __device__ bool is_page_contained(page_state_s* const s, size_t start_row, size_t num_rows)
+{
+  size_t const page_begin = s->col.start_row + s->page.chunk_row;
+  size_t const page_end   = page_begin + s->page.num_rows;
+  size_t const begin      = start_row;
+  size_t const end        = start_row + num_rows;
+
+  return page_begin >= begin && page_end <= end;
+}
+
 /**
  * @brief Read a 32-bit varint integer
  *
@@ -1728,10 +1749,11 @@ __global__ void __launch_bounds__(block_size)
       auto const thread_depth = depth + t;
       if (thread_depth < s->page.num_output_nesting_levels) {
         // if we are not a bounding page (as checked above) then we are either
-        // returning 0 rows from the page (completely outside the bounds) or all
-        // rows in the page (completely within the bounds)
+        // returning all rows/values from this page, or 0 of them
         pp->nesting[thread_depth].batch_size =
-          s->num_rows == 0 ? 0 : pp->nesting[thread_depth].size;
+          (s->num_rows == 0 && !is_page_contained(s, min_row, num_rows))
+            ? 0
+            : pp->nesting[thread_depth].size;
       }
       depth += blockDim.x;
     }
@@ -1838,7 +1860,19 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
 
   // if we have no work to do (eg, in a skip_rows/num_rows case) in this page.
-  if (s->num_rows == 0 && !(has_repetition && is_bounds_page(s, min_row, num_rows))) { return; }
+  //
+  // corner case: in the case of lists, we can have pages that contain "0" rows if the current row
+  // starts before this page and ends after this page:
+  //       P0        P1        P2
+  //  |---------|---------|----------|
+  //        ^------------------^
+  //      row start           row end
+  // P1 will contain 0 rows
+  //
+  if (s->num_rows == 0 && !(has_repetition && (is_bounds_page(s, min_row, num_rows) ||
+                                               is_page_contained(s, min_row, num_rows)))) {
+    return;
+  }
 
   if (s->dict_base) {
     out_thread0 = (s->dict_bits > 0) ? 64 : 32;

From 89ec635dceacde2b6715af253029ef317905df4e Mon Sep 17 00:00:00 2001
From: AJ Schmidt <ajschmidt8@users.noreply.github.com>
Date: Wed, 8 Feb 2023 12:17:50 -0500
Subject: [PATCH 06/10] Update shared workflow branches (#12733)

This PR updates the branch reference used for our shared workflows.

Authors:
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12733
---
 .github/workflows/build.yaml | 14 +++++++-------
 .github/workflows/pr.yaml    | 26 +++++++++++++-------------
 .github/workflows/test.yaml  | 14 +++++++-------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 3366554db30..26d07515f70 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
       skip_upload_pkgs: libcudf-example
   wheel-build-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
   wheel-publish-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -77,7 +77,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-publish-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-publish-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index cf20b0006a2..f33fc15c52f 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -25,32 +25,32 @@ jobs:
       - wheel-build-dask-cudf
       - wheel-tests-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
     with:
       build_type: pull-request
   conda-python-cudf-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: pull-request
       test_script: "ci/test_python_cudf.sh"
@@ -58,14 +58,14 @@ jobs:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: pull-request
       test_script: "ci/test_python_other.sh"
   conda-java-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: pull-request
       node_type: "gpu-latest-1"
@@ -75,7 +75,7 @@ jobs:
   conda-notebook-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: pull-request
       node_type: "gpu-latest-1"
@@ -85,7 +85,7 @@ jobs:
   wheel-build-cudf:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: cudf
@@ -94,7 +94,7 @@ jobs:
   wheel-tests-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: cudf
@@ -106,7 +106,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-tests-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: dask_cudf
@@ -115,7 +115,7 @@ jobs:
   wheel-tests-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
     with:
       build_type: pull-request
       package-name: dask_cudf
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 1b117bb2f4f..ff19d51f8ef 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-cudf-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -43,7 +43,7 @@ jobs:
       test_script: "ci/test_python_other.sh"
   conda-java-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
       run_script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
       run_script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
       test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
   wheel-tests-dask-cudf:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}

From d3f9dafa49c973c5e5d8b8a9336bbc92555ea0c3 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 8 Feb 2023 11:41:10 -0600
Subject: [PATCH 07/10] Fix faulty conditional logic in JIT `GroupBy.apply`
 (#12706)

Closes https://github.com/rapidsai/cudf/issues/12686

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12706
---
 python/cudf/cudf/tests/test_groupby.py  | 17 +++++++++++++++++
 python/cudf/udf_cpp/groupby/function.cu |  6 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index c5b330fd89c..1fea3c7a37e 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -519,6 +519,23 @@ def test_groupby_apply_jit_args(func, args, groupby_jit_data):
     run_groupby_apply_jit_test(groupby_jit_data, func, ["key1", "key2"], *args)
 
 
+def test_groupby_apply_jit_block_divergence():
+    # https://github.com/rapidsai/cudf/issues/12686
+    df = cudf.DataFrame(
+        {
+            "a": [0, 0, 0, 1, 1, 1],
+            "b": [1, 1, 1, 2, 3, 4],
+        }
+    )
+
+    def diverging_block(grp_df):
+        if grp_df["a"].mean() > 0:
+            return grp_df["b"].mean()
+        return 0
+
+    run_groupby_apply_jit_test(df, diverging_block, ["a"])
+
+
 @pytest.mark.parametrize("nelem", [2, 3, 100, 500, 1000])
 @pytest.mark.parametrize(
     "func",
diff --git a/python/cudf/udf_cpp/groupby/function.cu b/python/cudf/udf_cpp/groupby/function.cu
index f94f99c4b49..782371b8a44 100644
--- a/python/cudf/udf_cpp/groupby/function.cu
+++ b/python/cudf/udf_cpp/groupby/function.cu
@@ -284,7 +284,7 @@ extern "C" {
   __device__ int name##_##cname(return_type* numba_return_value, type* const data, int64_t size) \
   {                                                                                              \
     return_type const res = name<type>(data, size);                                              \
-    if (threadIdx.x == 0) { *numba_return_value = res; }                                         \
+    *numba_return_value   = res;                                                                 \
     __syncthreads();                                                                             \
     return 0;                                                                                    \
   }
@@ -309,8 +309,8 @@ extern "C" {
   __device__ int name##_##cname(                                                 \
     int64_t* numba_return_value, type* const data, int64_t* index, int64_t size) \
   {                                                                              \
-    auto const res = name<type>(data, index, size);                              \
-    if (threadIdx.x == 0) { *numba_return_value = res; }                         \
+    auto const res      = name<type>(data, index, size);                         \
+    *numba_return_value = res;                                                   \
     __syncthreads();                                                             \
     return 0;                                                                    \
   }

From 0161ba896a1d70ba3e049bdbb3d649cedba2aeb0 Mon Sep 17 00:00:00 2001
From: Cindy Jiang <47068112+cindyyuanjiang@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:48:38 -0800
Subject: [PATCH 08/10] Add `regex_program` strings replacing java APIs and
 tests (#12701)

This PR adds [replace_re, replace_with_backrefs](https://docs.rapids.ai/api/libcudf/nightly/replace__re_8hpp.html) related `regex_program` java APIs and unit tests.
Part of work for https://github.com/NVIDIA/spark-rapids/issues/7295.

Authors:
  - Cindy Jiang (https://github.com/cindyyuanjiang)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12701
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 71 +++++++++++---
 java/src/main/native/src/ColumnViewJni.cpp    | 43 ++++----
 .../java/ai/rapids/cudf/ColumnVectorTest.java | 98 ++++++++++++-------
 3 files changed, 149 insertions(+), 63 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 2d0bf28225f..0cb9ed37d9f 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2922,8 +2922,21 @@ public final ColumnVector stringReplace(Scalar target, Scalar replace) {
    * @param repl The string scalar to replace for each pattern match.
    * @return A new column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector replaceRegex(String pattern, Scalar repl) {
-    return replaceRegex(pattern, repl, -1);
+    return replaceRegex(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), repl);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program pattern
+   * using the replacement string scalar.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param repl The string scalar to replace for each pattern match.
+   * @return A new column vector containing the string results.
+   */
+  public final ColumnVector replaceRegex(RegexProgram regexProg, Scalar repl) {
+    return replaceRegex(regexProg, repl, -1);
   }
 
   /**
@@ -2935,12 +2948,27 @@ public final ColumnVector replaceRegex(String pattern, Scalar repl) {
    * @param maxRepl The maximum number of times a replacement should occur within each string.
    * @return A new column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector replaceRegex(String pattern, Scalar repl, int maxRepl) {
+    return replaceRegex(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), repl, maxRepl);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program pattern
+   * using the replacement string scalar.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param repl The string scalar to replace for each pattern match.
+   * @param maxRepl The maximum number of times a replacement should occur within each string.
+   * @return A new column vector containing the string results.
+   */
+  public final ColumnVector replaceRegex(RegexProgram regexProg, Scalar repl, int maxRepl) {
     if (!repl.getType().equals(DType.STRING)) {
       throw new IllegalArgumentException("Replacement must be a string scalar");
     }
-    return new ColumnVector(replaceRegex(getNativeView(), pattern, repl.getScalarHandle(),
-        maxRepl));
+    assert regexProg != null : "regex program may not be null";
+    return new ColumnVector(replaceRegex(getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                         regexProg.capture().nativeId, repl.getScalarHandle(), maxRepl));
   }
 
   /**
@@ -2966,9 +2994,26 @@ public final ColumnVector replaceMultiRegex(String[] patterns, ColumnView repls)
    * @param replace The replacement template for creating the output string.
    * @return A new java column vector containing the string results.
    */
+  @Deprecated
   public final ColumnVector stringReplaceWithBackrefs(String pattern, String replace) {
-    return new ColumnVector(stringReplaceWithBackrefs(getNativeView(), pattern,
-        replace));
+    return stringReplaceWithBackrefs(new RegexProgram(pattern), replace);
+  }
+
+  /**
+   * For each string, replaces any character sequence matching the given regex program
+   * pattern using the replace template for back-references.
+   *
+   * Any null string entries return corresponding null output column entries.
+   *
+   * @param regexProg The regex program with pattern to search within each string.
+   * @param replace The replacement template for creating the output string.
+   * @return A new java column vector containing the string results.
+   */
+  public final ColumnVector stringReplaceWithBackrefs(RegexProgram regexProg, String replace) {
+    assert regexProg != null : "regex program may not be null";
+    return new ColumnVector(
+        stringReplaceWithBackrefs(getNativeView(), regexProg.pattern(), regexProg.combinedFlags(),
+                                  regexProg.capture().nativeId, replace));
   }
 
   /**
@@ -4129,12 +4174,14 @@ private static native long substringColumn(long columnView, long startColumn, lo
    * Native method for replacing each regular expression pattern match with the specified
    * replacement string.
    * @param columnView native handle of the cudf::column_view being operated on.
-   * @param pattern The regular expression pattern to search within each string.
+   * @param pattern regular expression pattern to search within each string.
+   * @param flags regex flags setting.
+   * @param capture capture groups setting.
    * @param repl native handle of the cudf::scalar containing the replacement string.
    * @param maxRepl maximum number of times to replace the pattern within a string
    * @return native handle of the resulting cudf column containing the string results.
    */
-  private static native long replaceRegex(long columnView, String pattern,
+  private static native long replaceRegex(long columnView, String pattern, int flags, int capture,
                                           long repl, long maxRepl) throws CudfException;
 
   /**
@@ -4148,15 +4195,17 @@ private static native long replaceMultiRegex(long columnView, String[] patterns,
                                                long repls) throws CudfException;
 
   /**
-   * Native method for replacing any character sequence matching the given pattern
-   * using the replace template for back-references.
+   * Native method for replacing any character sequence matching the given regex program
+   * pattern using the replace template for back-references.
    * @param columnView native handle of the cudf::column_view being operated on.
    * @param pattern The regular expression patterns to search within each string.
+   * @param flags Regex flags setting.
+   * @param capture Capture groups setting.
    * @param replace The replacement template for creating the output string.
    * @return native handle of the resulting cudf column containing the string results.
    */
-  private static native long stringReplaceWithBackrefs(long columnView, String pattern,
-                                                       String replace) throws CudfException;
+  private static native long stringReplaceWithBackrefs(long columnView, String pattern, int flags,
+                                                       int capture, String replace) throws CudfException;
 
   /**
    * Native method for checking if strings in a column starts with a specified comparison string.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 958efd364ed..c42cc430560 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -1606,21 +1606,24 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapContains(JNIEnv *env,
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceRegex(JNIEnv *env, jclass,
-                                                                    jlong j_column_view,
-                                                                    jstring j_pattern, jlong j_repl,
-                                                                    jlong j_maxrepl) {
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceRegex(
+    JNIEnv *env, jclass, jlong j_column_view, jstring j_pattern, jint regex_flags,
+    jint capture_groups, jlong j_repl, jlong j_maxrepl) {
 
   JNI_NULL_CHECK(env, j_column_view, "column is null", 0);
   JNI_NULL_CHECK(env, j_pattern, "pattern string is null", 0);
   JNI_NULL_CHECK(env, j_repl, "replace scalar is null", 0);
   try {
     cudf::jni::auto_set_device(env);
-    auto cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
-    cudf::strings_column_view scv(*cv);
-    cudf::jni::native_jstring pattern(env, j_pattern);
-    auto repl = reinterpret_cast<cudf::string_scalar const *>(j_repl);
-    return release_as_jlong(cudf::strings::replace_re(scv, pattern.get(), *repl, j_maxrepl));
+    auto const cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
+    auto const strings_column = cudf::strings_column_view{*cv};
+    auto const pattern = cudf::jni::native_jstring(env, j_pattern);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern.get(), flags, groups);
+    auto const repl = reinterpret_cast<cudf::string_scalar const *>(j_repl);
+    return release_as_jlong(
+        cudf::strings::replace_re(strings_column, *regex_prog, *repl, j_maxrepl));
   }
   CATCH_STD(env, 0);
 }
@@ -1646,19 +1649,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceMultiRegex(JNIEnv
 }
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringReplaceWithBackrefs(
-    JNIEnv *env, jclass, jlong column_view, jstring patternObj, jstring replaceObj) {
+    JNIEnv *env, jclass, jlong j_column_view, jstring pattern_obj, jint regex_flags,
+    jint capture_groups, jstring replace_obj) {
 
-  JNI_NULL_CHECK(env, column_view, "column is null", 0);
-  JNI_NULL_CHECK(env, patternObj, "pattern string is null", 0);
-  JNI_NULL_CHECK(env, replaceObj, "replace string is null", 0);
+  JNI_NULL_CHECK(env, j_column_view, "column is null", 0);
+  JNI_NULL_CHECK(env, pattern_obj, "pattern string is null", 0);
+  JNI_NULL_CHECK(env, replace_obj, "replace string is null", 0);
   try {
     cudf::jni::auto_set_device(env);
-    cudf::column_view *cv = reinterpret_cast<cudf::column_view *>(column_view);
-    cudf::strings_column_view scv(*cv);
-    cudf::jni::native_jstring ss_pattern(env, patternObj);
-    cudf::jni::native_jstring ss_replace(env, replaceObj);
+    auto const cv = reinterpret_cast<cudf::column_view const *>(j_column_view);
+    auto const strings_column = cudf::strings_column_view{*cv};
+    auto const pattern = cudf::jni::native_jstring(env, pattern_obj);
+    auto const flags = static_cast<cudf::strings::regex_flags>(regex_flags);
+    auto const groups = static_cast<cudf::strings::capture_groups>(capture_groups);
+    auto const regex_prog = cudf::strings::regex_program::create(pattern.get(), flags, groups);
+    cudf::jni::native_jstring ss_replace(env, replace_obj);
     return release_as_jlong(
-        cudf::strings::replace_with_backrefs(scv, ss_pattern.get(), ss_replace.get()));
+        cudf::strings::replace_with_backrefs(strings_column, *regex_prog, ss_replace.get()));
   }
   CATCH_STD(env, 0);
 }
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index ab4baf74277..db64dcb08c7 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -5147,29 +5147,42 @@ void teststringReplaceThrowsException() {
 
   @Test
   void testReplaceRegex() {
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl);
-         ColumnVector expected =
-             ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
-      assertColumnsAreEqual(expected, actual);
-    }
+    try (ColumnVector v = ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
+         Scalar repl = Scalar.fromString("Repl")) {
+      String pattern = "[tT]itle";
+      RegexProgram regexProg = new RegexProgram(pattern, CaptureGroups.NON_CAPTURE);
 
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl, 0)) {
-      assertColumnsAreEqual(v, actual);
-    }
+      try (ColumnVector actual = v.replaceRegex(pattern, repl);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
 
-    try (ColumnVector v =
-             ColumnVector.fromStrings("title and Title with title", "nothing", null, "Title");
-         Scalar repl = Scalar.fromString("Repl");
-         ColumnVector actual = v.replaceRegex("[tT]itle", repl, 1);
-         ColumnVector expected =
-             ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
-      assertColumnsAreEqual(expected, actual);
+      try (ColumnVector actual = v.replaceRegex(pattern, repl, 0)) {
+        assertColumnsAreEqual(v, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(pattern, repl, 1);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Repl with Repl", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl, 0)) {
+        assertColumnsAreEqual(v, actual);
+      }
+
+      try (ColumnVector actual = v.replaceRegex(regexProg, repl, 1);
+           ColumnVector expected =
+               ColumnVector.fromStrings("Repl and Title with title", "nothing", null, "Repl")) {
+        assertColumnsAreEqual(expected, actual);
+      }
     }
   }
 
@@ -5188,45 +5201,55 @@ void testReplaceMultiRegex() {
   @Test
   void testStringReplaceWithBackrefs() {
 
-    try (ColumnVector v = ColumnVector.fromStrings("<h1>title</h1>", "<h1>another title</h1>",
-        null);
+    try (ColumnVector v = ColumnVector.fromStrings("<h1>title</h1>", "<h1>another title</h1>", null);
          ColumnVector expected = ColumnVector.fromStrings("<h2>title</h2>",
              "<h2>another title</h2>", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs("<h1>(.*)</h1>", "<h2>\\1</h2>")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("<h1>(.*)</h1>", "<h2>\\1</h2>");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("<h1>(.*)</h1>"), "<h2>\\1</h2>")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     try (ColumnVector v = ColumnVector.fromStrings("2020-1-01", "2020-2-02", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01", "2020-02-02", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])-", "-0\\1-")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])-", "-0\\1-");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])-"), "-0\\1-")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
-    try (ColumnVector v = ColumnVector.fromStrings("2020-01-1", "2020-02-2",
-        "2020-03-3invalid", null);
+    try (ColumnVector v = ColumnVector.fromStrings("2020-01-1", "2020-02-2", "2020-03-3invalid", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01", "2020-02-02",
              "2020-03-3invalid", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "-([0-9])$", "-0\\1")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])$", "-0\\1");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])$"), "-0\\1")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     try (ColumnVector v = ColumnVector.fromStrings("2020-01-1 random_text", "2020-02-2T12:34:56",
-        "2020-03-3invalid", null);
+             "2020-03-3invalid", null);
          ColumnVector expected = ColumnVector.fromStrings("2020-01-01 random_text",
              "2020-02-02T12:34:56", "2020-03-3invalid", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "-([0-9])([ T])", "-0\\1\\2")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("-([0-9])([ T])", "-0\\1\\2");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("-([0-9])([ T])"), "-0\\1\\2")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     // test zero as group index
     try (ColumnVector v = ColumnVector.fromStrings("aa-11 b2b-345", "aa-11a 1c-2b2 b2-c3", "11-aa", null);
          ColumnVector expected = ColumnVector.fromStrings("aa-11:aa:11; b2b-345:b:345;",
              "aa-11:aa:11;a 1c-2:c:2;b2 b2-c3", "11-aa", null);
-         ColumnVector actual = v.stringReplaceWithBackrefs(
-             "([a-z]+)-([0-9]+)", "${0}:${1}:${2};")) {
+         ColumnVector actual = v.stringReplaceWithBackrefs("([a-z]+)-([0-9]+)", "${0}:${1}:${2};");
+         ColumnVector actualRe =
+             v.stringReplaceWithBackrefs(new RegexProgram("([a-z]+)-([0-9]+)"), "${0}:${1}:${2};")) {
       assertColumnsAreEqual(expected, actual);
+      assertColumnsAreEqual(expected, actualRe);
     }
 
     // group index exceeds group count
@@ -5236,6 +5259,13 @@ void testStringReplaceWithBackrefs() {
       }
     });
 
+    // group index exceeds group count
+    assertThrows(CudfException.class, () -> {
+      try (ColumnVector v = ColumnVector.fromStrings("ABC123defgh");
+           ColumnVector r =
+               v.stringReplaceWithBackrefs(new RegexProgram("([A-Z]+)([0-9]+)([a-z]+)"), "\\4")) {
+      }
+    });
   }
 
   @Test

From c20c8b42215e38bee207b49dad6e28ea04ccbd8c Mon Sep 17 00:00:00 2001
From: Sevag H <shanssian@nvidia.com>
Date: Wed, 8 Feb 2023 16:50:48 -0500
Subject: [PATCH 09/10] Bump pinned rapids wheel deps to 23.4 (#12735)

We introduced a change to pin RAPIDS wheel dependencies to the same release version. However, branch 23.04 was created before that last PR was merged, so as of now cudf's 23.4 wheels are installing 23.2 RAPIDS dependencies. This PR updates those pins to the current release.

Authors:
  - Sevag H (https://github.com/sevagh)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12735
---
 python/cudf/setup.py      | 2 +-
 python/dask_cudf/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 48199d83478..88bc2cfae28 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -21,7 +21,7 @@
     "typing_extensions",
     # Allow floating minor versions for Arrow.
     "pyarrow==10",
-    f"rmm{cuda_suffix}==23.2.*",
+    f"rmm{cuda_suffix}==23.4.*",
     f"ptxcompiler{cuda_suffix}",
     f"cubinlinker{cuda_suffix}",
     "cupy-cuda11x",
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 4b420b1b97c..be4c704019d 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -13,7 +13,7 @@
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",
-    f"cudf{cuda_suffix}==23.2.*",
+    f"cudf{cuda_suffix}==23.4.*",
     "cupy-cuda11x",
 ]
 

From 3e4ff2afb8a5ca7f1ce051f2d86945688dfb21f9 Mon Sep 17 00:00:00 2001
From: Ajay Thorve <AjayThorve@users.noreply.github.com>
Date: Wed, 8 Feb 2023 16:25:28 -0800
Subject: [PATCH 10/10] Reduce error handling verbosity in CI tests scripts
 (#12738)

This PR adds a less verbose [trap method](https://github.com/rapidsai/cugraph/blob/f2b081075704aabc789603e14ce552eac3fbe692/ci/test.sh#L19), for error handling to help ensure that we capture all potential error codes in our test scripts, and works as follows:

- setting an environment variable, EXITCODE, with a default value of 0
- setting a trap statement triggered by ERR signals which will set EXITCODE=1 when any commands return a non-zero exit code

cc @ajschmidt8

Authors:
  - Ajay Thorve (https://github.com/AjayThorve)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12738
---
 ci/test_cpp.sh           | 12 ++++--------
 ci/test_java.sh          | 13 ++++---------
 ci/test_notebooks.sh     |  9 ++++-----
 ci/test_python_common.sh |  3 +--
 ci/test_python_cudf.sh   | 25 +++++--------------------
 ci/test_python_other.sh  | 31 +++++--------------------------
 6 files changed, 23 insertions(+), 70 deletions(-)

diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 5b1e8aa398c..b3d7919b279 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -21,7 +21,6 @@ set -u
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
 mkdir -p "${RAPIDS_TESTS_DIR}"
-SUITEERROR=0
 
 rapids-print-env
 
@@ -32,6 +31,8 @@ rapids-mamba-retry install \
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 # TODO: Disabling stream identification for now.
@@ -61,12 +62,6 @@ for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
     #else
     #    GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
     #fi
-
-    exitcode=$?
-    if (( ${exitcode} != 0 )); then
-        SUITEERROR=${exitcode}
-        echo "FAILED: GTest ${gt}"
-    fi
 done
 
 if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
@@ -85,4 +80,5 @@ if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
     # TODO: test-results/*.cs.log are processed in gpuci
 fi
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_java.sh b/ci/test_java.sh
index 27a1f2aa46f..f905aaa1178 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -29,22 +29,17 @@ rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   libcudf
 
-SUITEERROR=0
-
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "Run Java tests"
 pushd java
 mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF -DCUDF_JNI_ENABLE_PROFILING=OFF
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf Java"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index f1e17162195..7f5f35219b0 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -36,9 +36,8 @@ pushd notebooks
 # (space-separated list of filenames without paths)
 SKIPNBS=""
 
-# Set SUITEERROR to failure if any run fails
-SUITEERROR=0
-
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 for nb in $(find . -name "*.ipynb"); do
     nbBasename=$(basename ${nb})
@@ -55,8 +54,8 @@ for nb in $(find . -name "*.ipynb"); do
     else
         nvidia-smi
         ${NBTEST} ${nbBasename}
-        SUITEERROR=$((SUITEERROR | $?))
     fi
 done
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index 107540c0192..0e922c105dd 100755
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 
@@ -27,7 +27,6 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
-SUITEERROR=0
 
 rapids-print-env
 
diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index bea162a9318..337ef38cf97 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 source "$(dirname "$0")/test_python_common.sh"
@@ -7,6 +7,8 @@ source "$(dirname "$0")/test_python_common.sh"
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "pytest cudf"
@@ -24,12 +26,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-coverage.xml" \
   --cov-report=term \
   tests
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 popd
 
 # Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
@@ -48,12 +44,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-coverage.xml" \
   --cov-report=term \
   benchmarks
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 
 rapids-logger "pytest for cudf benchmarks using pandas"
 CUDF_BENCHMARKS_USE_PANDAS=ON \
@@ -67,12 +57,7 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-pandas-coverage.xml" \
   --cov-report=term \
   benchmarks
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
index d7a5e288193..b79cd44cdbe 100755
--- a/ci/test_python_other.sh
+++ b/ci/test_python_other.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Common setup steps shared by Python test jobs
 source "$(dirname "$0")/test_python_common.sh"
@@ -12,6 +12,8 @@ rapids-mamba-retry install \
 rapids-logger "Check GPU usage"
 nvidia-smi
 
+EXITCODE=0
+trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "pytest dask_cudf"
@@ -26,12 +28,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
   --cov-report=term \
   dask_cudf
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in dask-cudf"
-fi
 popd
 
 rapids-logger "pytest custreamz"
@@ -46,12 +42,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
   --cov-report=term \
   custreamz
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in custreamz"
-fi
 popd
 
 set -e
@@ -73,12 +63,6 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/strings-udf-coverage.xml" \
   --cov-report=term \
   tests
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in strings_udf"
-fi
 popd
 
 rapids-logger "pytest cudf with strings_udf"
@@ -94,12 +78,7 @@ pytest \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-strings-udf-coverage.xml" \
   --cov-report=term \
   tests/test_udf_masked_ops.py
-exitcode=$?
-
-if (( ${exitcode} != 0 )); then
-    SUITEERROR=${exitcode}
-    echo "FAILED: 1 or more tests in cudf with strings_udf"
-fi
 popd
 
-exit ${SUITEERROR}
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}