From d441f51ad0d6860e0d3c25837d8ecc55076a1be5 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 22 Feb 2023 09:46:25 -0500
Subject: [PATCH 01/60] Enable groupby std and variance aggregation types in
 libcudf Debug build (#12799)

Re-enable groupby with `std` and `var` aggregations that were disabled for Debug builds due to a runtime issue.
Retesting with nvcc 11.5, the error is no longer present so the code and the gtests have been re-enabled.
Found while working on PR #12784

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12799
---
 cpp/src/groupby/sort/group_std.cu |  8 +-------
 cpp/tests/groupby/std_tests.cpp   | 28 ----------------------------
 cpp/tests/groupby/var_tests.cpp   | 28 ----------------------------
 3 files changed, 1 insertion(+), 63 deletions(-)
diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu
index 87fd9f7e843..a3efc1f172a 100644
--- a/cpp/src/groupby/sort/group_std.cu
+++ b/cpp/src/groupby/sort/group_std.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -99,9 +99,6 @@ struct var_functor {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr)
   {
-// Running this in debug build causes a runtime error:
-// `reduce_by_key failed on 2nd step: invalid device function`
-#if !defined(__CUDACC_DEBUG__)
     using ResultType = cudf::detail::target_type_t<T, aggregation::Kind::VARIANCE>;
 
     std::unique_ptr<column> result = make_numeric_column(data_type(type_to_id<ResultType>()),
@@ -141,9 +138,6 @@ struct var_functor {
                        });
 
     return result;
-#else
-    CUDF_FAIL("Groupby std/var supported in debug build");
-#endif
   }
 
   template <typename T, typename... Args>
diff --git a/cpp/tests/groupby/std_tests.cpp b/cpp/tests/groupby/std_tests.cpp
index fa3afeb30f8..56ddce1554f 100644
--- a/cpp/tests/groupby/std_tests.cpp
+++ b/cpp/tests/groupby/std_tests.cpp
@@ -33,11 +33,7 @@ using supported_types = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, flo
 
 TYPED_TEST_SUITE(groupby_std_test, supported_types);
 
-#ifdef NDEBUG  // currently groupby std tests are not supported in debug
 TYPED_TEST(groupby_std_test, basic)
-#else
-TYPED_TEST(groupby_std_test, DISABLED_basic)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -57,11 +53,7 @@ TYPED_TEST(groupby_std_test, DISABLED_basic)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG  // currently groupby std tests are not supported in debug
 TYPED_TEST(groupby_std_test, empty_cols)
-#else
-TYPED_TEST(groupby_std_test, DISABLED_empty_cols)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -77,11 +69,7 @@ TYPED_TEST(groupby_std_test, DISABLED_empty_cols)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG  // currently groupby std tests are not supported in debug
 TYPED_TEST(groupby_std_test, zero_valid_keys)
-#else
-TYPED_TEST(groupby_std_test, DISABLED_zero_valid_keys)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -97,11 +85,7 @@ TYPED_TEST(groupby_std_test, DISABLED_zero_valid_keys)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG  // currently groupby std tests are not supported in debug
 TYPED_TEST(groupby_std_test, zero_valid_values)
-#else
-TYPED_TEST(groupby_std_test, DISABLED_zero_valid_values)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -117,11 +101,7 @@ TYPED_TEST(groupby_std_test, DISABLED_zero_valid_values)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG  // currently groupby std tests are not supported in debug
 TYPED_TEST(groupby_std_test, null_keys_and_values)
-#else
-TYPED_TEST(groupby_std_test, DISABLED_null_keys_and_values)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -142,11 +122,7 @@ TYPED_TEST(groupby_std_test, DISABLED_null_keys_and_values)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG  // currently groupby std tests are not supported in debug
 TYPED_TEST(groupby_std_test, ddof_non_default)
-#else
-TYPED_TEST(groupby_std_test, DISABLED_ddof_non_default)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -167,11 +143,7 @@ TYPED_TEST(groupby_std_test, DISABLED_ddof_non_default)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG  // currently groupby std tests are not supported in debug
 TYPED_TEST(groupby_std_test, dictionary)
-#else
-TYPED_TEST(groupby_std_test, DISABLED_dictionary)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
diff --git a/cpp/tests/groupby/var_tests.cpp b/cpp/tests/groupby/var_tests.cpp
index 2bffcd5c337..1827fd287f9 100644
--- a/cpp/tests/groupby/var_tests.cpp
+++ b/cpp/tests/groupby/var_tests.cpp
@@ -33,11 +33,7 @@ using supported_types = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, flo
 
 TYPED_TEST_SUITE(groupby_var_test, supported_types);
 
-#ifdef NDEBUG
 TYPED_TEST(groupby_var_test, basic)
-#else
-TYPED_TEST(groupby_var_test, DISABLED_basic)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -57,11 +53,7 @@ TYPED_TEST(groupby_var_test, DISABLED_basic)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG
 TYPED_TEST(groupby_var_test, empty_cols)
-#else
-TYPED_TEST(groupby_var_test, DISABLED_empty_cols)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -77,11 +69,7 @@ TYPED_TEST(groupby_var_test, DISABLED_empty_cols)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG
 TYPED_TEST(groupby_var_test, zero_valid_keys)
-#else
-TYPED_TEST(groupby_var_test, DISABLED_zero_valid_keys)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -97,11 +85,7 @@ TYPED_TEST(groupby_var_test, DISABLED_zero_valid_keys)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG
 TYPED_TEST(groupby_var_test, zero_valid_values)
-#else
-TYPED_TEST(groupby_var_test, DISABLED_zero_valid_values)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -117,11 +101,7 @@ TYPED_TEST(groupby_var_test, DISABLED_zero_valid_values)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG
 TYPED_TEST(groupby_var_test, null_keys_and_values)
-#else
-TYPED_TEST(groupby_var_test, DISABLED_null_keys_and_values)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -143,11 +123,7 @@ TYPED_TEST(groupby_var_test, DISABLED_null_keys_and_values)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG
 TYPED_TEST(groupby_var_test, ddof_non_default)
-#else
-TYPED_TEST(groupby_var_test, DISABLED_ddof_non_default)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;
@@ -170,11 +146,7 @@ TYPED_TEST(groupby_var_test, DISABLED_ddof_non_default)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
-#ifdef NDEBUG
 TYPED_TEST(groupby_var_test, dictionary)
-#else
-TYPED_TEST(groupby_var_test, DISABLED_dictionary)
-#endif
 {
   using K = int32_t;
   using V = TypeParam;

From d077c9b2a3764818fb690995f2c2867a2c024811 Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Wed, 22 Feb 2023 09:42:46 -0800
Subject: [PATCH 02/60] Variable fragment sizes for Parquet writer (#12685)

Fixes #12613

This PR adds the ability for columns to have different fragment sizes.  This allows a large fragment size for narrow columns, but allows for finer grained fragments for very wide columns.  This change should make wide columns fit (approximately) within page size constraints, and should help with compressors that rely on pages being under a certain threshold (i.e. Zstandard).

Authors:
  - Ed Seidl (https://github.com/etseidl)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Mike Wilson (https://github.com/hyperbolic2346)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: https://github.com/rapidsai/cudf/pull/12685
---
 cpp/include/cudf/io/parquet.hpp              |   4 +-
 cpp/src/io/parquet/page_enc.cu               | 251 ++++++++++--------
 cpp/src/io/parquet/parquet_gpu.hpp           |  43 ++-
 cpp/src/io/parquet/writer_impl.cu            | 265 ++++++++++++++-----
 cpp/src/io/parquet/writer_impl.hpp           |  42 +--
 cpp/tests/io/parquet_chunked_reader_test.cpp |   3 +-
 cpp/tests/io/parquet_test.cpp                |  46 +++-
 7 files changed, 449 insertions(+), 205 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index f4fb4d91f58..92b69deb671 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -494,7 +494,7 @@ class parquet_writer_options {
   // Maximum size of column chunk dictionary (in bytes)
   size_t _max_dictionary_size = default_max_dictionary_size;
   // Maximum number of rows in a page fragment
-  size_type _max_page_fragment_size = default_max_page_fragment_size;
+  std::optional<size_type> _max_page_fragment_size;
 
   /**
    * @brief Constructor from sink and table.
@@ -1076,7 +1076,7 @@ class chunked_parquet_writer_options {
   // Maximum size of column chunk dictionary (in bytes)
   size_t _max_dictionary_size = default_max_dictionary_size;
   // Maximum number of rows in a page fragment
-  size_type _max_page_fragment_size = default_max_page_fragment_size;
+  std::optional<size_type> _max_page_fragment_size;
 
   /**
    * @brief Constructor from sink.
diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index 9f8f42702cd..5a12acec2a3 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -126,129 +126,164 @@ constexpr size_t underflow_safe_subtract(size_t a, size_t b)
   return a - b;
 }
 
+void __device__ init_frag_state(frag_init_state_s* const s,
+                                uint32_t fragment_size,
+                                int part_end_row)
+{
+  // frag.num_rows = fragment_size except for the last fragment in partition which can be
+  // smaller. num_rows is fixed but fragment size could be larger if the data is strings or
+  // nested.
+  s->frag.num_rows           = min(fragment_size, part_end_row - s->frag.start_row);
+  s->frag.num_dict_vals      = 0;
+  s->frag.fragment_data_size = 0;
+  s->frag.dict_data_size     = 0;
+
+  s->frag.start_value_idx  = row_to_value_idx(s->frag.start_row, s->col);
+  auto const end_value_idx = row_to_value_idx(s->frag.start_row + s->frag.num_rows, s->col);
+  s->frag.num_leaf_values  = end_value_idx - s->frag.start_value_idx;
+
+  if (s->col.level_offsets != nullptr) {
+    // For nested schemas, the number of values in a fragment is not directly related to the
+    // number of encoded data elements or the number of rows.  It is simply the number of
+    // repetition/definition values which together encode validity and nesting information.
+    auto const first_level_val_idx = s->col.level_offsets[s->frag.start_row];
+    auto const last_level_val_idx  = s->col.level_offsets[s->frag.start_row + s->frag.num_rows];
+    s->frag.num_values             = last_level_val_idx - first_level_val_idx;
+  } else {
+    s->frag.num_values = s->frag.num_rows;
+  }
+}
+
+template <int block_size>
+void __device__ calculate_frag_size(frag_init_state_s* const s, int t)
+{
+  using block_reduce = cub::BlockReduce<uint32_t, block_size>;
+  __shared__ typename block_reduce::TempStorage reduce_storage;
+
+  auto const physical_type   = s->col.physical_type;
+  auto const leaf_type       = s->col.leaf_column->type().id();
+  auto const dtype_len       = physical_type_len(physical_type, leaf_type);
+  auto const nvals           = s->frag.num_leaf_values;
+  auto const start_value_idx = s->frag.start_value_idx;
+
+  for (uint32_t i = 0; i < nvals; i += block_size) {
+    auto const val_idx  = start_value_idx + i + t;
+    auto const is_valid = i + t < nvals && val_idx < s->col.leaf_column->size() &&
+                          s->col.leaf_column->is_valid(val_idx);
+    uint32_t len;
+    if (is_valid) {
+      len = dtype_len;
+      if (physical_type == BYTE_ARRAY) {
+        switch (leaf_type) {
+          case type_id::STRING: {
+            auto str = s->col.leaf_column->element<string_view>(val_idx);
+            len += str.size_bytes();
+          } break;
+          case type_id::LIST: {
+            auto list_element =
+              get_element<statistics::byte_array_view>(*s->col.leaf_column, val_idx);
+            len += list_element.size_bytes();
+          } break;
+          default: CUDF_UNREACHABLE("Unsupported data type for leaf column");
+        }
+      }
+    } else {
+      len = 0;
+    }
+
+    len = block_reduce(reduce_storage).Sum(len);
+    if (t == 0) { s->frag.fragment_data_size += len; }
+    __syncthreads();
+    // page fragment size must fit in a 32-bit signed integer
+    if (s->frag.fragment_data_size > std::numeric_limits<int32_t>::max()) {
+      CUDF_UNREACHABLE("page fragment size exceeds maximum for i32");
+    }
+  }
+}
+
 }  // anonymous namespace
 
 // blockDim {512,1,1}
 template <int block_size>
 __global__ void __launch_bounds__(block_size)
-  gpuInitPageFragments(device_2dspan<PageFragment> frag,
-                       device_span<parquet_column_device_view const> col_desc,
-                       device_span<partition_info const> partitions,
-                       device_span<int const> part_frag_offset,
-                       uint32_t fragment_size)
+  gpuInitRowGroupFragments(device_2dspan<PageFragment> frag,
+                           device_span<parquet_column_device_view const> col_desc,
+                           device_span<partition_info const> partitions,
+                           device_span<int const> part_frag_offset,
+                           uint32_t fragment_size)
 {
   __shared__ __align__(16) frag_init_state_s state_g;
 
-  using block_reduce = cub::BlockReduce<uint32_t, block_size>;
-  __shared__ typename block_reduce::TempStorage reduce_storage;
-
   frag_init_state_s* const s              = &state_g;
   uint32_t const t                        = threadIdx.x;
-  auto const physical_type                = col_desc[blockIdx.x].physical_type;
   uint32_t const num_fragments_per_column = frag.size().second;
 
   if (t == 0) { s->col = col_desc[blockIdx.x]; }
   __syncthreads();
 
-  auto const leaf_type = s->col.leaf_column->type().id();
-  auto const dtype_len = physical_type_len(physical_type, leaf_type);
-
   for (uint32_t frag_y = blockIdx.y; frag_y < num_fragments_per_column; frag_y += gridDim.y) {
     if (t == 0) {
       // Find which partition this fragment came from
       auto it =
         thrust::upper_bound(thrust::seq, part_frag_offset.begin(), part_frag_offset.end(), frag_y);
-      int p             = it - part_frag_offset.begin() - 1;
-      int part_end_row  = partitions[p].start_row + partitions[p].num_rows;
+      int const p            = it - part_frag_offset.begin() - 1;
+      int const part_end_row = partitions[p].start_row + partitions[p].num_rows;
       s->frag.start_row = (frag_y - part_frag_offset[p]) * fragment_size + partitions[p].start_row;
-
-      // frag.num_rows = fragment_size except for the last fragment in partition which can be
-      // smaller. num_rows is fixed but fragment size could be larger if the data is strings or
-      // nested.
-      s->frag.num_rows           = min(fragment_size, part_end_row - s->frag.start_row);
-      s->frag.num_dict_vals      = 0;
-      s->frag.fragment_data_size = 0;
-      s->frag.dict_data_size     = 0;
-
-      s->frag.start_value_idx = row_to_value_idx(s->frag.start_row, s->col);
-      size_type end_value_idx = row_to_value_idx(s->frag.start_row + s->frag.num_rows, s->col);
-      s->frag.num_leaf_values = end_value_idx - s->frag.start_value_idx;
-
-      if (s->col.level_offsets != nullptr) {
-        // For nested schemas, the number of values in a fragment is not directly related to the
-        // number of encoded data elements or the number of rows.  It is simply the number of
-        // repetition/definition values which together encode validity and nesting information.
-        size_type first_level_val_idx = s->col.level_offsets[s->frag.start_row];
-        size_type last_level_val_idx  = s->col.level_offsets[s->frag.start_row + s->frag.num_rows];
-        s->frag.num_values            = last_level_val_idx - first_level_val_idx;
-      } else {
-        s->frag.num_values = s->frag.num_rows;
-      }
+      s->frag.chunk     = frag[blockIdx.x][frag_y].chunk;
+      init_frag_state(s, fragment_size, part_end_row);
     }
     __syncthreads();
 
-    size_type nvals           = s->frag.num_leaf_values;
-    size_type start_value_idx = s->frag.start_value_idx;
-
-    for (uint32_t i = 0; i < nvals; i += block_size) {
-      uint32_t val_idx  = start_value_idx + i + t;
-      uint32_t is_valid = (i + t < nvals && val_idx < s->col.leaf_column->size())
-                            ? s->col.leaf_column->is_valid(val_idx)
-                            : 0;
-      uint32_t len;
-      if (is_valid) {
-        len = dtype_len;
-        if (physical_type == BYTE_ARRAY) {
-          switch (leaf_type) {
-            case type_id::STRING: {
-              auto str = s->col.leaf_column->element<string_view>(val_idx);
-              len += str.size_bytes();
-            } break;
-            case type_id::LIST: {
-              auto list_element =
-                get_element<statistics::byte_array_view>(*s->col.leaf_column, val_idx);
-              len += list_element.size_bytes();
-            } break;
-            default: CUDF_UNREACHABLE("Unsupported data type for leaf column");
-          }
-        }
-      } else {
-        len = 0;
-      }
-
-      len = block_reduce(reduce_storage).Sum(len);
-      if (t == 0) { s->frag.fragment_data_size += len; }
-      __syncthreads();
-      // page fragment size must fit in a 32-bit signed integer
-      if (s->frag.fragment_data_size > std::numeric_limits<int32_t>::max()) {
-        CUDF_UNREACHABLE("page fragment size exceeds maximum for i32");
-      }
-    }
+    calculate_frag_size<block_size>(s, t);
     __syncthreads();
     if (t == 0) { frag[blockIdx.x][frag_y] = s->frag; }
   }
 }
 
+// blockDim {512,1,1}
+template <int block_size>
+__global__ void __launch_bounds__(block_size)
+  gpuCalculatePageFragments(device_span<PageFragment> frag,
+                            device_span<size_type const> column_frag_sizes)
+{
+  __shared__ __align__(16) frag_init_state_s state_g;
+
+  EncColumnChunk* const ck_g = frag[blockIdx.x].chunk;
+  frag_init_state_s* const s = &state_g;
+  uint32_t const t           = threadIdx.x;
+  auto const fragment_size   = column_frag_sizes[ck_g->col_desc_id];
+
+  if (t == 0) { s->col = *ck_g->col_desc; }
+  __syncthreads();
+
+  if (t == 0) {
+    int const part_end_row = ck_g->start_row + ck_g->num_rows;
+    s->frag.start_row      = ck_g->start_row + (blockIdx.x - ck_g->first_fragment) * fragment_size;
+    s->frag.chunk          = ck_g;
+    init_frag_state(s, fragment_size, part_end_row);
+  }
+  __syncthreads();
+
+  calculate_frag_size<block_size>(s, t);
+  if (t == 0) { frag[blockIdx.x] = s->frag; }
+}
+
 // blockDim {128,1,1}
 __global__ void __launch_bounds__(128)
-  gpuInitFragmentStats(device_2dspan<statistics_group> groups,
-                       device_2dspan<PageFragment const> fragments,
-                       device_span<parquet_column_device_view const> col_desc)
+  gpuInitFragmentStats(device_span<statistics_group> groups,
+                       device_span<PageFragment const> fragments)
 {
-  uint32_t const lane_id                  = threadIdx.x & WARP_MASK;
-  uint32_t const column_id                = blockIdx.x;
-  uint32_t const num_fragments_per_column = fragments.size().second;
-
-  uint32_t frag_id = blockIdx.y * 4 + (threadIdx.x / cudf::detail::warp_size);
-  while (frag_id < num_fragments_per_column) {
+  uint32_t const lane_id = threadIdx.x & WARP_MASK;
+  uint32_t const frag_id = blockIdx.x * 4 + (threadIdx.x / cudf::detail::warp_size);
+  if (frag_id < fragments.size()) {
     if (lane_id == 0) {
       statistics_group g;
-      g.col                      = &col_desc[column_id];
-      g.start_row                = fragments[column_id][frag_id].start_value_idx;
-      g.num_rows                 = fragments[column_id][frag_id].num_leaf_values;
-      groups[column_id][frag_id] = g;
+      auto* const ck_g = fragments[frag_id].chunk;
+      g.col            = ck_g->col_desc;
+      g.start_row      = fragments[frag_id].start_value_idx;
+      g.num_rows       = fragments[frag_id].num_leaf_values;
+      groups[frag_id]  = g;
     }
-    frag_id += gridDim.y * 4;
   }
 }
 
@@ -389,7 +424,7 @@ __global__ void __launch_bounds__(128)
 
       if (num_rows >= ck_g.num_rows ||
           (values_in_page > 0 && (page_size + fragment_data_size > this_max_page_size)) ||
-          rows_in_page >= max_page_size_rows) {
+          rows_in_page + frag_g.num_rows > max_page_size_rows) {
         if (ck_g.use_dictionary) {
           // Additional byte to store entry bit width
           page_size = 1 + max_RLE_page_size(ck_g.dict_rle_bits, values_in_page);
@@ -2057,33 +2092,35 @@ __global__ void __launch_bounds__(1)
   ck_g->column_index_size = static_cast<uint32_t>(col_idx_end - ck_g->column_index_blob);
 }
 
-void InitPageFragments(device_2dspan<PageFragment> frag,
-                       device_span<parquet_column_device_view const> col_desc,
-                       device_span<partition_info const> partitions,
-                       device_span<int const> part_frag_offset,
-                       uint32_t fragment_size,
-                       rmm::cuda_stream_view stream)
+void InitRowGroupFragments(device_2dspan<PageFragment> frag,
+                           device_span<parquet_column_device_view const> col_desc,
+                           device_span<partition_info const> partitions,
+                           device_span<int const> part_frag_offset,
+                           uint32_t fragment_size,
+                           rmm::cuda_stream_view stream)
 {
   auto const num_columns              = frag.size().first;
   auto const num_fragments_per_column = frag.size().second;
   auto const grid_y = std::min(static_cast<uint32_t>(num_fragments_per_column), MAX_GRID_Y_SIZE);
   dim3 const dim_grid(num_columns, grid_y);  // 1 threadblock per fragment
-  gpuInitPageFragments<512><<<dim_grid, 512, 0, stream.value()>>>(
+  gpuInitRowGroupFragments<512><<<dim_grid, 512, 0, stream.value()>>>(
     frag, col_desc, partitions, part_frag_offset, fragment_size);
 }
 
-void InitFragmentStatistics(device_2dspan<statistics_group> groups,
-                            device_2dspan<PageFragment const> fragments,
-                            device_span<parquet_column_device_view const> col_desc,
+void CalculatePageFragments(device_span<PageFragment> frag,
+                            device_span<size_type const> column_frag_sizes,
+                            rmm::cuda_stream_view stream)
+{
+  gpuCalculatePageFragments<512><<<frag.size(), 512, 0, stream.value()>>>(frag, column_frag_sizes);
+}
+
+void InitFragmentStatistics(device_span<statistics_group> groups,
+                            device_span<PageFragment const> fragments,
                             rmm::cuda_stream_view stream)
 {
-  int const num_columns              = col_desc.size();
-  int const num_fragments_per_column = fragments.size().second;
-  auto const y_dim =
-    util::div_rounding_up_safe(num_fragments_per_column, 128 / cudf::detail::warp_size);
-  auto const grid_y = std::min(static_cast<uint32_t>(y_dim), MAX_GRID_Y_SIZE);
-  dim3 const dim_grid(num_columns, grid_y);  // 1 warp per fragment
-  gpuInitFragmentStats<<<dim_grid, 128, 0, stream.value()>>>(groups, fragments, col_desc);
+  int const num_fragments = fragments.size();
+  int const dim = util::div_rounding_up_safe(num_fragments, 128 / cudf::detail::warp_size);
+  gpuInitFragmentStats<<<dim, 128, 0, stream.value()>>>(groups, fragments);
 }
 
 void InitEncoderPages(device_2dspan<EncColumnChunk> chunks,
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 9b156745e41..c91f182c4f4 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -480,8 +480,9 @@ void DecodePageData(hostdevice_vector<PageInfo>& pages,
                     rmm::cuda_stream_view stream);
 
 /**
- * @brief Launches kernel for initializing encoder page fragments
+ * @brief Launches kernel for initializing encoder row group fragments
  *
+ * These fragments are used to calculate row group boundaries.
  * Based on the number of rows in each fragment, populates the value count, the size of data in the
  * fragment, the number of unique values, and the data size of unique values.
  *
@@ -492,24 +493,38 @@ void DecodePageData(hostdevice_vector<PageInfo>& pages,
  * @param[in] fragment_size Number of rows per fragment
  * @param[in] stream CUDA stream to use
  */
-void InitPageFragments(cudf::detail::device_2dspan<PageFragment> frag,
-                       device_span<parquet_column_device_view const> col_desc,
-                       device_span<partition_info const> partitions,
-                       device_span<int const> first_frag_in_part,
-                       uint32_t fragment_size,
-                       rmm::cuda_stream_view stream);
+void InitRowGroupFragments(cudf::detail::device_2dspan<PageFragment> frag,
+                           device_span<parquet_column_device_view const> col_desc,
+                           device_span<partition_info const> partitions,
+                           device_span<int const> first_frag_in_part,
+                           uint32_t fragment_size,
+                           rmm::cuda_stream_view stream);
+
+/**
+ * @brief Launches kernel for calculating encoder page fragments with variable fragment sizes
+ *
+ * Based on the number of rows in each fragment, populates the value count, the size of data in the
+ * fragment, the number of unique values, and the data size of unique values.
+ *
+ * This assumes an initial call to InitRowGroupFragments has been made.
+ *
+ * @param[out] frag Fragment array [fragment_id]
+ * @param[in] column_frag_sizes Number of rows per fragment per column [column_id]
+ * @param[in] stream CUDA stream to use
+ */
+void CalculatePageFragments(device_span<PageFragment> frag,
+                            device_span<size_type const> column_frag_sizes,
+                            rmm::cuda_stream_view stream);
 
 /**
- * @brief Launches kernel for initializing fragment statistics groups
+ * @brief Launches kernel for initializing fragment statistics groups with variable fragment sizes
  *
- * @param[out] groups Statistics groups [num_columns x num_fragments]
- * @param[in] fragments Page fragments [num_columns x num_fragments]
- * @param[in] col_desc Column description [num_columns]
+ * @param[out] groups Statistics groups [total_fragments]
+ * @param[in] fragments Page fragments [total_fragments]
  * @param[in] stream CUDA stream to use
  */
-void InitFragmentStatistics(cudf::detail::device_2dspan<statistics_group> groups,
-                            cudf::detail::device_2dspan<PageFragment const> fragments,
-                            device_span<gpu::parquet_column_device_view const> col_desc,
+void InitFragmentStatistics(device_span<statistics_group> groups,
+                            device_span<PageFragment const> fragments,
                             rmm::cuda_stream_view stream);
 
 /**
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 13ec2d652a6..88176ee1901 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -31,6 +31,7 @@
 #include <io/utilities/config_utils.hpp>
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/get_value.cuh>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/linked_column.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
@@ -38,6 +39,7 @@
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/null_mask.hpp>
 #include <cudf/strings/strings_column_view.hpp>
+#include <cudf/structs/structs_column_view.hpp>
 #include <cudf/table/table_device_view.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -85,6 +87,44 @@ parquet::Compression to_parquet_compression(compression_type compression)
   }
 }
 
+size_type column_size(column_view const& column, rmm::cuda_stream_view stream)
+{
+  if (column.size() == 0) { return 0; }
+
+  if (is_fixed_width(column.type())) {
+    return size_of(column.type()) * column.size();
+  } else if (column.type().id() == type_id::STRING) {
+    auto const scol = strings_column_view(column);
+    return cudf::detail::get_value<size_type>(scol.offsets(), column.size(), stream) -
+           cudf::detail::get_value<size_type>(scol.offsets(), 0, stream);
+  } else if (column.type().id() == type_id::STRUCT) {
+    auto const scol = structs_column_view(column);
+    size_type ret   = 0;
+    for (int i = 0; i < scol.num_children(); i++) {
+      ret += column_size(scol.get_sliced_child(i), stream);
+    }
+    return ret;
+  } else if (column.type().id() == type_id::LIST) {
+    auto const lcol = lists_column_view(column);
+    return column_size(lcol.get_sliced_child(stream), stream);
+  }
+
+  CUDF_FAIL("Unexpected compound type");
+}
+
+// checks to see if the given column has a fixed size.  This doesn't
+// check every row, so assumes string and list columns are not fixed, even
+// if each row is the same width.
+// TODO: update this if FIXED_LEN_BYTE_ARRAY is ever supported for writes.
+bool is_col_fixed_width(column_view const& column)
+{
+  if (column.type().id() == type_id::STRUCT) {
+    return std::all_of(column.child_begin(), column.child_end(), is_col_fixed_width);
+  }
+
+  return is_fixed_width(column.type());
+}
+
 }  // namespace
 
 struct aggregate_writer_metadata {
@@ -886,34 +926,33 @@ gpu::parquet_column_device_view parquet_column_view::get_device_view(
   return desc;
 }
 
-void writer::impl::init_page_fragments(cudf::detail::hostdevice_2dvector<gpu::PageFragment>& frag,
-                                       device_span<gpu::parquet_column_device_view const> col_desc,
-                                       host_span<partition_info const> partitions,
-                                       device_span<int const> part_frag_offset,
-                                       uint32_t fragment_size)
+void writer::impl::init_row_group_fragments(
+  cudf::detail::hostdevice_2dvector<gpu::PageFragment>& frag,
+  device_span<gpu::parquet_column_device_view const> col_desc,
+  host_span<partition_info const> partitions,
+  device_span<int const> part_frag_offset,
+  uint32_t fragment_size)
 {
   auto d_partitions = cudf::detail::make_device_uvector_async(partitions, stream);
-  gpu::InitPageFragments(frag, col_desc, d_partitions, part_frag_offset, fragment_size, stream);
+  gpu::InitRowGroupFragments(frag, col_desc, d_partitions, part_frag_offset, fragment_size, stream);
   frag.device_to_host(stream, true);
 }
 
-void writer::impl::gather_fragment_statistics(
-  device_2dspan<statistics_chunk> frag_stats_chunk,
-  device_2dspan<gpu::PageFragment const> frag,
-  device_span<gpu::parquet_column_device_view const> col_desc,
-  uint32_t num_fragments)
+void writer::impl::calculate_page_fragments(device_span<gpu::PageFragment> frag,
+                                            host_span<size_type const> frag_sizes)
 {
-  auto num_columns = col_desc.size();
-  rmm::device_uvector<statistics_group> frag_stats_group(num_fragments * num_columns, stream);
-  auto frag_stats_group_2dview =
-    device_2dspan<statistics_group>(frag_stats_group.data(), num_columns, num_fragments);
-
-  gpu::InitFragmentStatistics(frag_stats_group_2dview, frag, col_desc, stream);
-  detail::calculate_group_statistics<detail::io_file_format::PARQUET>(frag_stats_chunk.data(),
-                                                                      frag_stats_group.data(),
-                                                                      num_fragments * num_columns,
-                                                                      stream,
-                                                                      int96_timestamps);
+  auto d_frag_sz = cudf::detail::make_device_uvector_async(frag_sizes, stream);
+  gpu::CalculatePageFragments(frag, d_frag_sz, stream);
+}
+
+void writer::impl::gather_fragment_statistics(device_span<statistics_chunk> frag_stats,
+                                              device_span<gpu::PageFragment const> frags)
+{
+  rmm::device_uvector<statistics_group> frag_stats_group(frag_stats.size(), stream);
+
+  gpu::InitFragmentStatistics(frag_stats_group, frags, stream);
+  detail::calculate_group_statistics<detail::io_file_format::PARQUET>(
+    frag_stats.data(), frag_stats_group.data(), frag_stats.size(), stream, int96_timestamps);
   stream.synchronize();
 }
 
@@ -1407,23 +1446,63 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
     });
 
   // Init page fragments
-  // 5000 is good enough for up to ~200-character strings. Longer strings will start producing
-  // fragments larger than the desired page size -> TODO: keep track of the max fragment size, and
-  // iteratively reduce this value if the largest fragment exceeds the max page size limit (we
-  // ideally want the page size to be below 1MB so as to have enough pages to get good
-  // compression/decompression performance).
-  // If using the default fragment size, scale it up or down depending on the requested page size.
-  if (max_page_fragment_size_ == cudf::io::default_max_page_fragment_size) {
-    max_page_fragment_size_ = (cudf::io::default_max_page_fragment_size * max_page_size_bytes) /
-                              cudf::io::default_max_page_size_bytes;
+  // 5000 is good enough for up to ~200-character strings. Longer strings and deeply nested columns
+  // will start producing fragments larger than the desired page size, so calculate fragment sizes
+  // for each leaf column.  Skip if the fragment size is not the default.
+  auto max_page_fragment_size = max_page_fragment_size_.value_or(default_max_page_fragment_size);
+
+  std::vector<size_type> column_frag_size(num_columns, max_page_fragment_size);
+
+  if (table.num_rows() > 0 && not max_page_fragment_size_.has_value()) {
+    std::vector<size_t> column_sizes;
+    std::transform(single_streams_table.begin(),
+                   single_streams_table.end(),
+                   std::back_inserter(column_sizes),
+                   [this](auto const& column) { return column_size(column, stream); });
+
+    // adjust global fragment size if a single fragment will overrun a rowgroup
+    auto const table_size  = std::reduce(column_sizes.begin(), column_sizes.end());
+    auto const avg_row_len = util::div_rounding_up_safe<size_t>(table_size, table.num_rows());
+    if (avg_row_len > 0) {
+      auto const rg_frag_size = util::div_rounding_up_safe(max_row_group_size, avg_row_len);
+      max_page_fragment_size  = std::min<size_type>(rg_frag_size, max_page_fragment_size);
+    }
+
+    // dividing page size by average row length will tend to overshoot the desired
+    // page size when there's high variability in the row lengths. instead, shoot
+    // for multiple fragments per page to smooth things out. using 2 was too
+    // unbalanced in final page sizes, so using 4 which seems to be a good
+    // compromise at smoothing things out without getting fragment sizes too small.
+    auto frag_size_fn = [&](auto const& col, size_type col_size) {
+      const int target_frags_per_page = is_col_fixed_width(col) ? 1 : 4;
+      auto const avg_len =
+        target_frags_per_page * util::div_rounding_up_safe<size_type>(col_size, table.num_rows());
+      if (avg_len > 0) {
+        auto const frag_size = util::div_rounding_up_safe<size_type>(max_page_size_bytes, avg_len);
+        return std::min<size_type>(max_page_fragment_size, frag_size);
+      } else {
+        return max_page_fragment_size;
+      }
+    };
+
+    std::transform(single_streams_table.begin(),
+                   single_streams_table.end(),
+                   column_sizes.begin(),
+                   column_frag_size.begin(),
+                   frag_size_fn);
   }
 
+  // Fragments are calculated in two passes. In the first pass, a uniform number of fragments
+  // per column is used. This is done to satisfy the requirement that each column chunk within
+  // a row group has the same number of rows. After the row group (and thus column chunk)
+  // boundaries are known, a second pass is done to calculate fragments to be used in determining
+  // page boundaries within each column chunk.
   std::vector<int> num_frag_in_part;
   std::transform(partitions.begin(),
                  partitions.end(),
                  std::back_inserter(num_frag_in_part),
-                 [this](auto const& part) {
-                   return util::div_rounding_up_unsafe(part.num_rows, max_page_fragment_size_);
+                 [this, max_page_fragment_size](auto const& part) {
+                   return util::div_rounding_up_unsafe(part.num_rows, max_page_fragment_size);
                  });
 
   size_type num_fragments = std::reduce(num_frag_in_part.begin(), num_frag_in_part.end());
@@ -1434,7 +1513,7 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
   part_frag_offset.push_back(part_frag_offset.back() + num_frag_in_part.back());
 
   auto d_part_frag_offset = cudf::detail::make_device_uvector_async(part_frag_offset, stream);
-  cudf::detail::hostdevice_2dvector<gpu::PageFragment> fragments(
+  cudf::detail::hostdevice_2dvector<gpu::PageFragment> row_group_fragments(
     num_columns, num_fragments, stream);
 
   if (num_fragments != 0) {
@@ -1443,8 +1522,8 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
     leaf_column_views = create_leaf_column_device_views<gpu::parquet_column_device_view>(
       col_desc, *parent_column_table_device_view, stream);
 
-    init_page_fragments(
-      fragments, col_desc, partitions, d_part_frag_offset, max_page_fragment_size_);
+    init_row_group_fragments(
+      row_group_fragments, col_desc, partitions, d_part_frag_offset, max_page_fragment_size);
   }
 
   std::vector<size_t> const global_rowgroup_base = md->num_row_groups_per_file();
@@ -1461,9 +1540,9 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
     for (auto f = first_frag_in_rg; f <= last_frag_in_part; ++f) {
       size_t fragment_data_size = 0;
       for (auto c = 0; c < num_columns; c++) {
-        fragment_data_size += fragments[c][f].fragment_data_size;
+        fragment_data_size += row_group_fragments[c][f].fragment_data_size;
       }
-      size_type fragment_num_rows = fragments[0][f].num_rows;
+      size_type fragment_num_rows = row_group_fragments[0][f].num_rows;
 
       // If the fragment size gets larger than rg limit then break off a rg
       if (f > first_frag_in_rg &&  // There has to be at least one fragment in row group
@@ -1490,17 +1569,6 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
     }
   }
 
-  // Allocate column chunks and gather fragment statistics
-  rmm::device_uvector<statistics_chunk> frag_stats(0, stream);
-  if (stats_granularity_ != statistics_freq::STATISTICS_NONE) {
-    frag_stats.resize(num_fragments * num_columns, stream);
-    if (not frag_stats.is_empty()) {
-      auto frag_stats_2dview =
-        device_2dspan<statistics_chunk>(frag_stats.data(), num_columns, num_fragments);
-      gather_fragment_statistics(frag_stats_2dview, fragments, col_desc, num_fragments);
-    }
-  }
-
   std::vector<int> first_rg_in_part;
   std::exclusive_scan(
     num_rg_in_part.begin(), num_rg_in_part.end(), std::back_inserter(first_rg_in_part), 0);
@@ -1509,6 +1577,9 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
   auto const num_chunks = num_rowgroups * num_columns;
   hostdevice_2dvector<gpu::EncColumnChunk> chunks(num_rowgroups, num_columns, stream);
 
+  // total fragments per column (in case they are non-uniform)
+  std::vector<size_type> frags_per_column(num_columns, 0);
+
   for (size_t p = 0; p < partitions.size(); ++p) {
     int f               = part_frag_offset[p];
     size_type start_row = partitions[p].start_row;
@@ -1516,22 +1587,21 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
       size_t global_r = global_rowgroup_base[p] + r;  // Number of rowgroups already in file/part
       auto& row_group = md->file(p).row_groups[global_r];
       uint32_t fragments_in_chunk =
-        util::div_rounding_up_unsafe(row_group.num_rows, max_page_fragment_size_);
+        util::div_rounding_up_unsafe(row_group.num_rows, max_page_fragment_size);
       row_group.total_byte_size = 0;
       row_group.columns.resize(num_columns);
       for (int c = 0; c < num_columns; c++) {
         gpu::EncColumnChunk& ck = chunks[r + first_rg_in_part[p]][c];
 
-        ck             = {};
-        ck.col_desc    = col_desc.device_ptr() + c;
-        ck.col_desc_id = c;
-        ck.fragments   = &fragments.device_view()[c][f];
-        ck.stats =
-          (not frag_stats.is_empty()) ? frag_stats.data() + c * num_fragments + f : nullptr;
+        ck                   = {};
+        ck.col_desc          = col_desc.device_ptr() + c;
+        ck.col_desc_id       = c;
+        ck.fragments         = &row_group_fragments.device_view()[c][f];
+        ck.stats             = nullptr;
         ck.start_row         = start_row;
         ck.num_rows          = (uint32_t)row_group.num_rows;
         ck.first_fragment    = c * num_fragments + f;
-        auto chunk_fragments = fragments[c].subspan(f, fragments_in_chunk);
+        auto chunk_fragments = row_group_fragments[c].subspan(f, fragments_in_chunk);
         // In fragment struct, add a pointer to the chunk it belongs to
         // In each fragment in chunk_fragments, update the chunk pointer here.
         for (auto& frag : chunk_fragments) {
@@ -1551,15 +1621,23 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
         column_chunk_meta.path_in_schema = parquet_columns[c].get_path_in_schema();
         column_chunk_meta.codec          = UNCOMPRESSED;
         column_chunk_meta.num_values     = ck.num_values;
+
+        frags_per_column[c] += util::div_rounding_up_unsafe(
+          row_group.num_rows, std::min(column_frag_size[c], max_page_fragment_size));
       }
       f += fragments_in_chunk;
       start_row += (uint32_t)row_group.num_rows;
     }
   }
 
-  fragments.host_to_device(stream);
-  auto dict_info_owner = build_chunk_dictionaries(
-    chunks, col_desc, fragments, compression_, dict_policy_, max_dictionary_size_, stream);
+  row_group_fragments.host_to_device(stream);
+  auto dict_info_owner = build_chunk_dictionaries(chunks,
+                                                  col_desc,
+                                                  row_group_fragments,
+                                                  compression_,
+                                                  dict_policy_,
+                                                  max_dictionary_size_,
+                                                  stream);
   for (size_t p = 0; p < partitions.size(); p++) {
     for (int rg = 0; rg < num_rg_in_part[p]; rg++) {
       size_t global_rg = global_rowgroup_base[p] + rg;
@@ -1572,7 +1650,72 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
     }
   }
 
-  // Build chunk dictionaries and count pages
+  // The code preceding this used a uniform fragment size for all columns. Now recompute
+  // fragments with a (potentially) varying number of fragments per column.
+
+  // first figure out the total number of fragments and calculate the start offset for each column
+  std::vector<size_type> frag_offsets;
+  size_type const total_frags = [&]() {
+    if (frags_per_column.size() > 0) {
+      std::exclusive_scan(frags_per_column.data(),
+                          frags_per_column.data() + num_columns + 1,
+                          std::back_inserter(frag_offsets),
+                          0);
+      return frag_offsets[num_columns];
+    } else {
+      return 0;
+    }
+  }();
+
+  rmm::device_uvector<statistics_chunk> frag_stats(0, stream);
+  hostdevice_vector<gpu::PageFragment> page_fragments(total_frags, stream);
+
+  // update fragments and/or prepare for fragment statistics calculation if necessary
+  if (total_frags != 0) {
+    if (stats_granularity_ != statistics_freq::STATISTICS_NONE) {
+      frag_stats.resize(total_frags, stream);
+    }
+
+    for (int c = 0; c < num_columns; c++) {
+      auto frag_offset     = frag_offsets[c];
+      auto const frag_size = column_frag_size[c];
+
+      for (size_t p = 0; p < partitions.size(); ++p) {
+        for (int r = 0; r < num_rg_in_part[p]; r++) {
+          auto const global_r   = global_rowgroup_base[p] + r;
+          auto const& row_group = md->file(p).row_groups[global_r];
+          uint32_t const fragments_in_chunk =
+            util::div_rounding_up_unsafe(row_group.num_rows, frag_size);
+          gpu::EncColumnChunk& ck = chunks[r + first_rg_in_part[p]][c];
+          ck.fragments            = page_fragments.device_ptr(frag_offset);
+          ck.first_fragment       = frag_offset;
+
+          // update the chunk pointer here for each fragment in chunk.fragments
+          for (uint32_t i = 0; i < fragments_in_chunk; i++) {
+            page_fragments[frag_offset + i].chunk =
+              &chunks.device_view()[r + first_rg_in_part[p]][c];
+          }
+
+          if (not frag_stats.is_empty()) { ck.stats = frag_stats.data() + frag_offset; }
+          frag_offset += fragments_in_chunk;
+        }
+      }
+    }
+
+    chunks.host_to_device(stream);
+
+    // re-initialize page fragments
+    page_fragments.host_to_device(stream);
+    calculate_page_fragments(page_fragments, column_frag_size);
+
+    // and gather fragment statistics
+    if (not frag_stats.is_empty()) {
+      gather_fragment_statistics(frag_stats,
+                                 {page_fragments.device_ptr(), static_cast<size_t>(total_frags)});
+    }
+  }
+
+  // Build chunk dictionaries and count pages. Sends chunks to device.
   hostdevice_vector<size_type> comp_page_sizes = init_page_sizes(
     chunks, col_desc, num_columns, max_page_size_bytes, max_page_size_rows, compression_, stream);
 
diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp
index 3569281fb47..24c35455ff7 100644
--- a/cpp/src/io/parquet/writer_impl.hpp
+++ b/cpp/src/io/parquet/writer_impl.hpp
@@ -122,32 +122,42 @@ class writer::impl {
 
  private:
   /**
-   * @brief Gather page fragments
+   * @brief Gather row group fragments
    *
-   * @param frag Destination page fragments
+   * This calculates fragments to be used in determining row group boundariesa.
+   *
+   * @param frag Destination row group fragments
    * @param col_desc column description array
    * @param[in] partitions Information about partitioning of table
    * @param[in] part_frag_offset A Partition's offset into fragment array
    * @param fragment_size Number of rows per fragment
    */
-  void init_page_fragments(hostdevice_2dvector<gpu::PageFragment>& frag,
-                           device_span<gpu::parquet_column_device_view const> col_desc,
-                           host_span<partition_info const> partitions,
-                           device_span<int const> part_frag_offset,
-                           uint32_t fragment_size);
+  void init_row_group_fragments(hostdevice_2dvector<gpu::PageFragment>& frag,
+                                device_span<gpu::parquet_column_device_view const> col_desc,
+                                host_span<partition_info const> partitions,
+                                device_span<int const> part_frag_offset,
+                                uint32_t fragment_size);
+
+  /**
+   * @brief Recalculate page fragments
+   *
+   * This calculates fragments to be used to determine page boundaries within
+   * column chunks.
+   *
+   * @param frag Destination page fragments
+   * @param frag_sizes Array of fragment sizes for each column
+   */
+  void calculate_page_fragments(device_span<gpu::PageFragment> frag,
+                                host_span<size_type const> frag_sizes);
 
   /**
    * @brief Gather per-fragment statistics
    *
-   * @param dst_stats output statistics
-   * @param frag Input page fragments
-   * @param col_desc column description array
-   * @param num_fragments Total number of fragments per column
+   * @param frag_stats output statistics
+   * @param frags Input page fragments
    */
-  void gather_fragment_statistics(device_2dspan<statistics_chunk> dst_stats,
-                                  device_2dspan<gpu::PageFragment const> frag,
-                                  device_span<gpu::parquet_column_device_view const> col_desc,
-                                  uint32_t num_fragments);
+  void gather_fragment_statistics(device_span<statistics_chunk> frag_stats,
+                                  device_span<gpu::PageFragment const> frags);
 
   /**
    * @brief Initialize encoder pages
@@ -220,9 +230,9 @@ class writer::impl {
   statistics_freq stats_granularity_   = statistics_freq::STATISTICS_NONE;
   dictionary_policy dict_policy_       = dictionary_policy::ALWAYS;
   size_t max_dictionary_size_          = default_max_dictionary_size;
-  size_type max_page_fragment_size_    = default_max_page_fragment_size;
   bool int96_timestamps                = false;
   int32_t column_index_truncate_length = default_column_index_truncate_length;
+  std::optional<size_type> max_page_fragment_size_;
   // Overall file metadata.  Filled in during the process and written during write_chunked_end()
   std::unique_ptr<aggregate_writer_metadata> md;
   // File footer key-value metadata. Written during write_chunked_end()
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 0cecf62cc9d..ed95d8381d9 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -93,6 +93,7 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
     cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *input_table)
       .max_page_size_bytes(max_page_size_bytes)
       .max_page_size_rows(max_page_size_rows)
+      .max_page_fragment_size(cudf::io::default_max_page_fragment_size)
       .build();
   cudf::io::write_parquet(write_opts);
 
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 48f69e3ecd3..141c06733a6 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -3705,6 +3705,42 @@ TEST_F(ParquetWriterTest, CheckPageRows)
   EXPECT_EQ(ph.data_page_header.num_values, page_rows);
 }
 
+TEST_F(ParquetWriterTest, CheckPageRowsAdjusted)
+{
+  // enough for a few pages with the default 20'000 rows/page
+  constexpr auto rows_per_page = 20'000;
+  constexpr auto num_rows      = 3 * rows_per_page;
+  const std::string s1(32, 'a');
+  auto col0_elements =
+    cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s1; });
+  auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + num_rows);
+
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckPageRowsAdjusted.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_size_rows(rows_per_page);
+  cudf::io::write_parquet(out_opts);
+
+  // check first page header and make sure it has only page_rows values
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  CUDF_EXPECTS(fmd.row_groups.size() > 0, "No row groups found");
+  CUDF_EXPECTS(fmd.row_groups[0].columns.size() == 1, "Invalid number of columns");
+  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
+  CUDF_EXPECTS(first_chunk.data_page_offset > 0, "Invalid location for first data page");
+
+  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
+  // version should be smaller than size of the struct.
+  auto const ph = read_page_header(
+    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::PageHeader), 0});
+
+  EXPECT_LE(ph.data_page_header.num_values, rows_per_page);
+}
+
 TEST_F(ParquetWriterTest, Decimal128Stats)
 {
   // check that decimal128 min and max statistics are written in network byte order
@@ -4046,14 +4082,14 @@ int32_t compare_binary(const std::vector<uint8_t>& v1,
 TEST_F(ParquetWriterTest, LargeColumnIndex)
 {
   // create a file large enough to be written in 2 batches (currently 1GB per batch)
+  // pick fragment size that num_rows is divisible by, so we'll get equal sized row groups
   const std::string s1(1000, 'a');
   const std::string s2(1000, 'b');
-  constexpr auto num_rows = 512 * 1024;
+  constexpr auto num_rows  = 512 * 1024;
+  constexpr auto frag_size = num_rows / 128;
 
-  // TODO(ets) need dictionary_policy set to NEVER from #12211. Then
-  // we don't need to append a number to make the strings unique.
   auto col0_elements = cudf::detail::make_counting_transform_iterator(
-    0, [&](auto i) { return ((i < num_rows) ? s1 : s2) + std::to_string(i); });
+    0, [&](auto i) { return (i < num_rows) ? s1 : s2; });
   auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + 2 * num_rows);
 
   auto const expected = table_view{{col0, col0}};
@@ -4063,6 +4099,8 @@ TEST_F(ParquetWriterTest, LargeColumnIndex)
     cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
       .compression(cudf::io::compression_type::NONE)
+      .dictionary_policy(cudf::io::dictionary_policy::NEVER)
+      .max_page_fragment_size(frag_size)
       .row_group_size_bytes(1024 * 1024 * 1024)
       .row_group_size_rows(num_rows);
   cudf::io::write_parquet(out_opts);

From a96b1508cf96207eef5e26b330d94d20f6bc5054 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 22 Feb 2023 17:12:37 -0800
Subject: [PATCH 03/60] Stop using versioneer to manage versions (#12741)

This PR replaces usage of versioneer with hard-coded version numbers in setup.py and __init__.py. Since cudf needs to manage versions across a wide range of file types (CMake, C++, Sphinx and doxygen docs, etc), versioneer cannot be relied on as a single source of truth and therefore does not allow us to single-source our versioning to the Git repo as is intended. Additionally, since the primary means of installing cudf is via conda packages (or now, pip packages), information from the package manager tends to be far more informative than the version strings for troubleshooting and debugging purposes. Conversely, the nonstandard version strings that it produces tend to be problematic for other tools, which at best will ignore such versions but at worst will simply fail.

This PR also replaces usage of an environment variable to set the package name for wheels in setup.py, instead moving the renaming logic into the same sed script used to update package versions. This change makes setup.py essentially static, paving the way for migration to pyproject.toml.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12741
---
 .gitattributes                           |    4 -
 .github/workflows/build.yaml             |    2 +
 .github/workflows/pr.yaml                |    2 +
 .pre-commit-config.yaml                  |    3 +-
 ci/checks/copyright.py                   |    3 +-
 ci/release/apply_wheel_modifications.sh  |   33 +
 ci/release/update-version.sh             |   27 +-
 python/cudf/_custom_build/backend.py     |   37 -
 python/cudf/cudf/__init__.py             |    4 +-
 python/cudf/cudf/_version.py             |  566 -------
 python/cudf/pyproject.toml               |    6 +-
 python/cudf/setup.cfg                    |   12 -
 python/cudf/setup.py                     |   28 +-
 python/cudf/versioneer.py                | 1904 ----------------------
 python/cudf_kafka/cudf_kafka/_version.py |  566 -------
 python/cudf_kafka/setup.cfg              |   10 +-
 python/cudf_kafka/setup.py               |    4 +-
 python/cudf_kafka/versioneer.py          | 1904 ----------------------
 python/custreamz/custreamz/_version.py   |  566 -------
 python/custreamz/setup.cfg               |   10 +-
 python/custreamz/setup.py                |    4 +-
 python/custreamz/versioneer.py           | 1904 ----------------------
 python/dask_cudf/dask_cudf/__init__.py   |    6 +-
 python/dask_cudf/dask_cudf/_version.py   |  566 -------
 python/dask_cudf/setup.cfg               |   10 +-
 python/dask_cudf/setup.py                |   24 +-
 python/dask_cudf/versioneer.py           | 1904 ----------------------
 setup.cfg                                |    7 +-
 28 files changed, 79 insertions(+), 10037 deletions(-)
 delete mode 100644 .gitattributes
 create mode 100755 ci/release/apply_wheel_modifications.sh
 delete mode 100644 python/cudf/_custom_build/backend.py
 delete mode 100644 python/cudf/cudf/_version.py
 delete mode 100644 python/cudf/versioneer.py
 delete mode 100644 python/cudf_kafka/cudf_kafka/_version.py
 delete mode 100644 python/cudf_kafka/versioneer.py
 delete mode 100644 python/custreamz/custreamz/_version.py
 delete mode 100644 python/custreamz/versioneer.py
 delete mode 100644 python/dask_cudf/dask_cudf/_version.py
 delete mode 100644 python/dask_cudf/versioneer.py

diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index fbfe7434d50..00000000000
--- a/.gitattributes
+++ /dev/null
@@ -1,4 +0,0 @@
-python/cudf/cudf/_version.py export-subst
-python/cudf_kafka/cudf_kafka/_version.py export-subst
-python/custreamz/custreamz/_version.py export-subst
-python/dask_cudf/dask_cudf/_version.py export-subst
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 26d07515f70..fa6704ef04e 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -64,6 +64,7 @@ jobs:
       package-name: cudf
       package-dir: python/cudf
       skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
+      uses-setup-env-vars: false
   wheel-publish-cudf:
     needs: wheel-build-cudf
     secrets: inherit
@@ -85,6 +86,7 @@ jobs:
       date: ${{ inputs.date }}
       package-name: dask_cudf
       package-dir: python/dask_cudf
+      uses-setup-env-vars: false
   wheel-publish-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index f33fc15c52f..73df2de20c2 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -91,6 +91,7 @@ jobs:
       package-name: cudf
       package-dir: python/cudf
       skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
+      uses-setup-env-vars: false
   wheel-tests-cudf:
     needs: wheel-build-cudf
     secrets: inherit
@@ -112,6 +113,7 @@ jobs:
       package-name: dask_cudf
       package-dir: python/dask_cudf
       before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && pip install --no-deps ./local-cudf/cudf*.whl"
+      uses-setup-env-vars: false
   wheel-tests-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a13b4ca10f1..244fc0d3872 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -144,8 +144,7 @@ repos:
                 exclude: |
                   (?x)^(
                     .*test.*|
-                    ^CHANGELOG.md$|
-                    ^.*versioneer.py$
+                    ^CHANGELOG.md$
                   )
 
 default_language_version:
diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py
index 0f2540c440c..e76d9524c76 100644
--- a/ci/checks/copyright.py
+++ b/ci/checks/copyright.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,7 +31,6 @@
 ]
 ExemptFiles = [
     re.compile(r"cpp/include/cudf_test/cxxopts.hpp"),
-    re.compile(r"versioneer[.]py"),
 ]
 
 # this will break starting at year 10000, which is probably OK :)
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
new file mode 100755
index 00000000000..e017b24be6e
--- /dev/null
+++ b/ci/release/apply_wheel_modifications.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Usage: bash apply_wheel_modifications.sh <new_version> <cuda_suffix>
+
+VERSION=${1}
+CUDA_SUFFIX=${2}
+
+# __init__.py versions
+sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf/cudf/__init__.py
+sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask_cudf/__init__.py
+sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
+sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py
+
+# setup.py versions
+sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/cudf/setup.py
+sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/dask_cudf/setup.py
+sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/cudf_kafka/setup.py
+sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/custreamz/setup.py
+
+# cudf setup.py cuda suffixes
+sed -i "s/name=\"cudf\"/name=\"cudf${CUDA_SUFFIX}\"/g" python/cudf/setup.py
+sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/cudf/setup.py
+sed -i "s/ptxcompiler/ptxcompiler${CUDA_SUFFIX}/g" python/cudf/setup.py
+sed -i "s/cubinlinker/cubinlinker${CUDA_SUFFIX}/g" python/cudf/setup.py
+
+# cudf pyproject.toml cuda suffixes
+sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/cudf/pyproject.toml
+
+# dask_cudf setup.py cuda suffixes
+sed -i "s/name=\"dask-cudf\"/name=\"dask-cudf${CUDA_SUFFIX}\"/g" python/dask_cudf/setup.py
+# Need to provide the == to avoid modifying the URL
+sed -i "s/\"cudf==/\"cudf${CUDA_SUFFIX}==/g" python/dask_cudf/setup.py
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 15d81127450..c8875fda641 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -34,19 +34,27 @@ function sed_runner() {
 # cpp update
 sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt
 
-# cpp stream testing update
-sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
-
-# Python update
+# Python CMakeLists updates
 sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt
 
-
 # cpp libcudf_kafka update
 sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
 
 # cpp cudf_jni update
 sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' java/src/main/native/CMakeLists.txt
 
+# Python __init__.py updates
+sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf/cudf/__init__.py
+sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/dask_cudf/__init__.py
+sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
+sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/custreamz/custreamz/__init__.py
+
+# Python setup.py updates
+sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/cudf/setup.py
+sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/dask_cudf/setup.py
+sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/cudf_kafka/setup.py
+sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/custreamz/setup.py
+
 # rapids-cmake version
 sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
 
@@ -81,9 +89,12 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_
 # Need to distutils-normalize the original version
 NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
 
-# Wheel builds install intra-RAPIDS dependencies from same release
-sed_runner "s/rmm{cuda_suffix}.*\",/rmm{cuda_suffix}==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/setup.py
-sed_runner "s/cudf{cuda_suffix}==.*\",/cudf{cuda_suffix}==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/setup.py
+# Dependency versions in setup.py
+sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/setup.py
+sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/setup.py
+
+# Dependency versions in pyproject.toml
+sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/pyproject.toml
 
 for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
diff --git a/python/cudf/_custom_build/backend.py b/python/cudf/_custom_build/backend.py
deleted file mode 100644
index 37b7edf2432..00000000000
--- a/python/cudf/_custom_build/backend.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
-
-"""Custom build backend for cudf to get versioned requirements.
-
-Based on https://setuptools.pypa.io/en/latest/build_meta.html
-"""
-import os
-from functools import wraps
-
-from setuptools import build_meta as _orig
-
-# Alias the required bits
-build_wheel = _orig.build_wheel
-build_sdist = _orig.build_sdist
-
-
-def replace_requirements(func):
-    @wraps(func)
-    def wrapper(config_settings=None):
-        orig_list = getattr(_orig, func.__name__)(config_settings)
-        append_list = [
-            f"rmm{os.getenv('RAPIDS_PY_WHEEL_CUDA_SUFFIX', default='')}"
-        ]
-        return orig_list + append_list
-
-    return wrapper
-
-
-get_requires_for_build_wheel = replace_requirements(
-    _orig.get_requires_for_build_wheel
-)
-get_requires_for_build_sdist = replace_requirements(
-    _orig.get_requires_for_build_sdist
-)
-get_requires_for_build_editable = replace_requirements(
-    _orig.get_requires_for_build_editable
-)
diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 05f61ee4f5a..04b64e18594 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -10,7 +10,6 @@
 import rmm
 
 from cudf import api, core, datasets, testing
-from cudf._version import get_versions
 from cudf.api.extensions import (
     register_dataframe_accessor,
     register_index_accessor,
@@ -112,8 +111,7 @@
 rmm.register_reinitialize_hook(clear_cache)
 
 
-__version__ = get_versions()["version"]
-del get_versions
+__version__ = "23.04.00"
 
 __all__ = [
     "BaseIndex",
diff --git a/python/cudf/cudf/_version.py b/python/cudf/cudf/_version.py
deleted file mode 100644
index 60a2afed39b..00000000000
--- a/python/cudf/cudf/_version.py
+++ /dev/null
@@ -1,566 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "$Format:%d$"
-    git_full = "$Format:%H$"
-    git_date = "$Format:%ci$"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "pep440"
-    cfg.tag_prefix = "v"
-    cfg.parentdir_prefix = "cudf-"
-    cfg.versionfile_source = "cudf/_version.py"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-
-    return decorate
-
-
-def run_command(
-    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
-):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
-            break
-        except OSError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print(f"unable to find command, tried {commands}")
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs)
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except OSError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = {r.strip() for r in refnames.strip("()").split(",")}
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = {r for r in refs if re.search(r"\d", r)}
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
-            if verbose:
-                print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(
-        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
-    )
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = (
-                "unable to parse git-describe output: '%s'" % describe_out
-            )
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces[
-                "error"
-            ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'"
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(
-            GITS, ["rev-list", "HEAD", "--count"], cwd=root
-        )
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(
-            get_keywords(), cfg.tag_prefix, verbose
-        )
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split("/"):
-            root = os.path.dirname(root)
-    except NameError:
-        return {
-            "version": "0+unknown",
-            "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to find root of source tree",
-            "date": None,
-        }
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 57464e83282..49c4d83245f 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -1,7 +1,7 @@
 # Copyright (c) 2021-2023, NVIDIA CORPORATION.
 
 [build-system]
-
+build-backend = "setuptools.build_meta"
 requires = [
     "wheel",
     "setuptools",
@@ -13,7 +13,5 @@ requires = [
     # Hard pin the patch version used during the build.
     "pyarrow==10.0.1",
     "protoc-wheel",
-    "versioneer",
+    "rmm==23.4.*",
 ]
-build-backend = "backend"
-backend-path = ["_custom_build"]
diff --git a/python/cudf/setup.cfg b/python/cudf/setup.cfg
index 59dd5d0179e..8380da371f9 100644
--- a/python/cudf/setup.cfg
+++ b/python/cudf/setup.cfg
@@ -1,17 +1,5 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-# See the docstring in versioneer.py for instructions. Note that you must
-# re-run 'versioneer.py setup' after changing this section, and commit the
-# resulting files.
-
-[versioneer]
-VCS = git
-style = pep440
-versionfile_source = cudf/_version.py
-versionfile_build = cudf/_version.py
-tag_prefix = v
-parentdir_prefix = cudf-
-
 [isort]
 line_length=79
 multi_line_output=3
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 88bc2cfae28..0150c4fe715 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -1,13 +1,8 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-import os
-
-import versioneer
 from setuptools import find_packages
 from skbuild import setup
 
-cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="")
-
 install_requires = [
     "cachetools",
     "cuda-python>=11.7.1,<12.0",
@@ -21,9 +16,9 @@
     "typing_extensions",
     # Allow floating minor versions for Arrow.
     "pyarrow==10",
-    f"rmm{cuda_suffix}==23.4.*",
-    f"ptxcompiler{cuda_suffix}",
-    f"cubinlinker{cuda_suffix}",
+    "rmm==23.4.*",
+    "ptxcompiler",
+    "cubinlinker",
     "cupy-cuda11x",
 ]
 
@@ -43,21 +38,9 @@
     ]
 }
 
-if "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE" in os.environ:
-    orig_get_versions = versioneer.get_versions
-
-    version_override = os.environ["RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE"]
-
-    def get_versions():
-        data = orig_get_versions()
-        data["version"] = version_override
-        return data
-
-    versioneer.get_versions = get_versions
-
 setup(
-    name=f"cudf{cuda_suffix}",
-    version=versioneer.get_version(),
+    name="cudf",
+    version="23.04.00",
     description="cuDF - GPU Dataframe",
     url="https://github.com/rapidsai/cudf",
     author="NVIDIA Corporation",
@@ -72,7 +55,6 @@ def get_versions():
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
     ],
-    cmdclass=versioneer.get_cmdclass(),
     include_package_data=True,
     packages=find_packages(include=["cudf", "cudf.*"]),
     package_data={
diff --git a/python/cudf/versioneer.py b/python/cudf/versioneer.py
deleted file mode 100644
index a6537a34ede..00000000000
--- a/python/cudf/versioneer.py
+++ /dev/null
@@ -1,1904 +0,0 @@
-# Version: 0.18
-
-"""The Versioneer - like a rocketeer, but for versions.
-
-The Versioneer
-==============
-
-* like a rocketeer, but for versions!
-* https://github.com/warner/python-versioneer
-* Brian Warner
-* License: Public Domain
-* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
-* [![Latest Version]
-(https://pypip.in/version/versioneer/badge.svg?style=flat)
-](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status]
-(https://travis-ci.org/warner/python-versioneer.png?branch=master)
-](https://travis-ci.org/warner/python-versioneer)
-
-This is a tool for managing a recorded version number in distutils-based
-python projects. The goal is to remove the tedious and error-prone "update
-the embedded version string" step from your release process. Making a new
-release should be as easy as recording a new tag in your version-control
-system, and maybe making new tarballs.
-
-
-## Quick Install
-
-* `pip install versioneer` to somewhere to your $PATH
-* add a `[versioneer]` section to your setup.cfg (see below)
-* run `versioneer install` in your source tree, commit the results
-
-## Version Identifiers
-
-Source trees come from a variety of places:
-
-* a version-control system checkout (mostly used by developers)
-* a nightly tarball, produced by build automation
-* a snapshot tarball, produced by a web-based VCS browser, like github's
-  "tarball from tag" feature
-* a release tarball, produced by "setup.py sdist", distributed through PyPI
-
-Within each source tree, the version identifier (either a string or a number,
-this tool is format-agnostic) can come from a variety of places:
-
-* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
-  about recent "tags" and an absolute revision-id
-* the name of the directory into which the tarball was unpacked
-* an expanded VCS keyword ($Id$, etc)
-* a `_version.py` created by some earlier build step
-
-For released software, the version identifier is closely related to a VCS
-tag. Some projects use tag names that include more than just the version
-string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
-needs to strip the tag prefix to extract the version identifier. For
-unreleased software (between tags), the version identifier should provide
-enough information to help developers recreate the same tree, while also
-giving them an idea of roughly how old the tree is (after version 1.2, before
-version 1.3). Many VCS systems can report a description that captures this,
-for example `git describe --tags --dirty --always` reports things like
-"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
-0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes.
-
-The version identifier is used for multiple purposes:
-
-* to allow the module to self-identify its version: `myproject.__version__`
-* to choose a name and prefix for a 'setup.py sdist' tarball
-
-## Theory of Operation
-
-Versioneer works by adding a special `_version.py` file into your source
-tree, where your `__init__.py` can import it. This `_version.py` knows how to
-dynamically ask the VCS tool for version information at import time.
-
-`_version.py` also contains `$Revision$` markers, and the installation
-process marks `_version.py` to have this marker rewritten with a tag name
-during the `git archive` command. As a result, generated tarballs will
-contain enough information to get the proper version.
-
-To allow `setup.py` to compute a version too, a `versioneer.py` is added to
-the top level of your source tree, next to `setup.py` and the `setup.cfg`
-that configures it. This overrides several distutils/setuptools commands to
-compute the version when invoked, and changes `setup.py build` and `setup.py
-sdist` to replace `_version.py` with a small static file that contains just
-the generated version data.
-
-## Installation
-
-See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
-
-## Version-String Flavors
-
-Code which uses Versioneer can learn about its version string at runtime by
-importing `_version` from your main `__init__.py` file and running the
-`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
-import the top-level `versioneer.py` and run `get_versions()`.
-
-Both functions return a dictionary with different flavors of version
-information:
-
-* `['version']`: A condensed version string, rendered using the selected
-  style. This is the most commonly used value for the project's version
-  string. The default "pep440" style yields strings like `0.11`,
-  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
-  below for alternative styles.
-
-* `['full-revisionid']`: detailed revision identifier. For Git, this is the
-  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
-
-* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
-  commit date in ISO 8601 format. This will be None if the date is not
-  available.
-
-* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
-  this is only accurate if run in a VCS checkout, otherwise it is likely to
-  be False or None
-
-* `['error']`: if the version string could not be computed, this will be set
-  to a string describing the problem, otherwise it will be None. It may be
-  useful to throw an exception in setup.py if this is set, to avoid e.g.
-  creating tarballs with a version string of "unknown".
-
-Some variants are more useful than others. Including `full-revisionid` in a
-bug report should allow developers to reconstruct the exact code being tested
-(or indicate the presence of local changes that should be shared with the
-developers). `version` is suitable for display in an "about" box or a CLI
-`--version` output: it can be easily compared against release notes and lists
-of bugs fixed in various releases.
-
-The installer adds the following text to your `__init__.py` to place a basic
-version in `YOURPROJECT.__version__`:
-
-    from cudf._version import get_versions
-    __version__ = get_versions()['version']
-    del get_versions
-
-## Styles
-
-The setup.cfg `style=` configuration controls how the VCS information is
-rendered into a version string.
-
-The default style, "pep440", produces a PEP440-compliant string, equal to the
-un-prefixed tag name for actual releases, and containing an additional "local
-version" section with more detail for in-between builds. For Git, this is
-TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
-tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
-that this commit is two revisions ("+2") beyond the "0.11" tag. For released
-software (exactly equal to a known tag), the identifier will only contain the
-stripped tag, e.g. "0.11".
-
-Other styles are available. See [details.md](details.md) in the Versioneer
-source tree for descriptions.
-
-## Debugging
-
-Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
-to return a version of "0+unknown". To investigate the problem, run `setup.py
-version`, which will run the version-lookup code in a verbose mode, and will
-display the full contents of `get_versions()` (including the `error` string,
-which may help identify what went wrong).
-
-## Known Limitations
-
-Some situations are known to cause problems for Versioneer. This details the
-most significant ones. More can be found on Github
-[issues page](https://github.com/warner/python-versioneer/issues).
-
-### Subprojects
-
-Versioneer has limited support for source trees in which `setup.py` is not in
-the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
-two common reasons why `setup.py` might not be in the root:
-
-* Source trees which contain multiple subprojects, such as
-  [Buildbot](https://github.com/buildbot/buildbot), which contains both
-  "master" and "slave" subprojects, each with their own `setup.py`,
-  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
-  distributions (and upload multiple independently-installable tarballs).
-* Source trees whose main purpose is to contain a C library, but which also
-  provide bindings to Python (and perhaps other langauges) in subdirectories.
-
-Versioneer will look for `.git` in parent directories, and most operations
-should get the right version string. However `pip` and `setuptools` have bugs
-and implementation details which frequently cause `pip install .` from a
-subproject directory to fail to find a correct version string (so it usually
-defaults to `0+unknown`).
-
-`pip install --editable .` should work correctly. `setup.py install` might
-work too.
-
-Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
-some later version.
-
-[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
-this issue. The discussion in
-[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
-issue from the Versioneer side in more detail.
-[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
-[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
-pip to let Versioneer work correctly.
-
-Versioneer-0.16 and earlier only looked for a `.git` directory next to the
-`setup.cfg`, so subprojects were completely unsupported with those releases.
-
-### Editable installs with setuptools <= 18.5
-
-`setup.py develop` and `pip install --editable .` allow you to install a
-project into a virtualenv once, then continue editing the source code (and
-test) without re-installing after every change.
-
-"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
-convenient way to specify executable scripts that should be installed along
-with the python package.
-
-These both work as expected when using modern setuptools. When using
-setuptools-18.5 or earlier, however, certain operations will cause
-`pkg_resources.DistributionNotFound` errors when running the entrypoint
-script, which must be resolved by re-installing the package. This happens
-when the install happens with one version, then the egg_info data is
-regenerated while a different version is checked out. Many setup.py commands
-cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
-a different virtualenv), so this can be surprising.
-
-[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
-this one, but upgrading to a newer version of setuptools should probably
-resolve it.
-
-### Unicode version strings
-
-While Versioneer works (and is continually tested) with both Python 2 and
-Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
-Newer releases probably generate unicode version strings on py2. It's not
-clear that this is wrong, but it may be surprising for applications when then
-write these strings to a network connection or include them in bytes-oriented
-APIs like cryptographic checksums.
-
-[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
-this question.
-
-
-## Updating Versioneer
-
-To upgrade your project to a new release of Versioneer, do the following:
-
-* install the new Versioneer (`pip install -U versioneer` or equivalent)
-* edit `setup.cfg`, if necessary, to include any new configuration settings
-  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
-* re-run `versioneer install` in your source tree, to replace
-  `SRC/_version.py`
-* commit any changed files
-
-## Future Directions
-
-This tool is designed to make it easily extended to other version-control
-systems: all VCS-specific components are in separate directories like
-src/git/ . The top-level `versioneer.py` script is assembled from these
-components by running make-versioneer.py . In the future, make-versioneer.py
-will take a VCS name as an argument, and will construct a version of
-`versioneer.py` that is specific to the given VCS. It might also take the
-configuration arguments that are currently provided manually during
-installation by editing setup.py . Alternatively, it might go the other
-direction and include code from all supported VCS systems, reducing the
-number of intermediate scripts.
-
-
-## License
-
-To make Versioneer easier to embed, all its code is dedicated to the public
-domain. The `_version.py` that it creates is also in the public domain.
-Specifically, both are released under the Creative Commons "Public Domain
-Dedication" license (CC0-1.0), as described in
-https://creativecommons.org/publicdomain/zero/1.0/ .
-
-"""
-
-from __future__ import print_function
-
-import errno
-import json
-import os
-import re
-import subprocess
-import sys
-
-try:
-    import configparser
-except ImportError:
-    import ConfigParser as configparser
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_root():
-    """Get the project root directory.
-
-    We require that all commands are run from the project root, i.e. the
-    directory that contains setup.py, setup.cfg, and versioneer.py .
-    """
-    root = os.path.realpath(os.path.abspath(os.getcwd()))
-    setup_py = os.path.join(root, "setup.py")
-    versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        # allow 'python path/to/setup.py COMMAND'
-        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
-        setup_py = os.path.join(root, "setup.py")
-        versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        err = (
-            "Versioneer was unable to run the project root directory. "
-            "Versioneer requires setup.py to be executed from "
-            "its immediate directory (like 'python setup.py COMMAND'), "
-            "or in a way that lets it use sys.argv[0] to find the root "
-            "(like 'python path/to/setup.py COMMAND')."
-        )
-        raise VersioneerBadRootError(err)
-    try:
-        # Certain runtime workflows (setup.py install/develop in a setuptools
-        # tree) execute all dependencies in a single python process, so
-        # "versioneer" may be imported multiple times, and python's shared
-        # module-import table will cache the first one. So we can't use
-        # os.path.dirname(__file__), as that will find whichever
-        # versioneer.py was first imported, even in later projects.
-        me = os.path.realpath(os.path.abspath(__file__))
-        me_dir = os.path.normcase(os.path.splitext(me)[0])
-        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
-        if me_dir != vsr_dir:
-            print(
-                "Warning: build in %s is using versioneer.py from %s"
-                % (os.path.dirname(me), versioneer_py)
-            )
-    except NameError:
-        pass
-    return root
-
-
-def get_config_from_root(root):
-    """Read the project setup.cfg file to determine Versioneer config."""
-    # This might raise EnvironmentError (if setup.cfg is missing), or
-    # configparser.NoSectionError (if it lacks a [versioneer] section), or
-    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
-    # the top of versioneer.py for instructions on writing your setup.cfg .
-    setup_cfg = os.path.join(root, "setup.cfg")
-    parser = configparser.SafeConfigParser()
-    with open(setup_cfg, "r") as f:
-        parser.readfp(f)
-    VCS = parser.get("versioneer", "VCS")  # mandatory
-
-    def get(parser, name):
-        if parser.has_option("versioneer", name):
-            return parser.get("versioneer", name)
-        return None
-
-    cfg = VersioneerConfig()
-    cfg.VCS = VCS
-    cfg.style = get(parser, "style") or ""
-    cfg.versionfile_source = get(parser, "versionfile_source")
-    cfg.versionfile_build = get(parser, "versionfile_build")
-    cfg.tag_prefix = get(parser, "tag_prefix")
-    if cfg.tag_prefix in ("''", '""'):
-        cfg.tag_prefix = ""
-    cfg.parentdir_prefix = get(parser, "parentdir_prefix")
-    cfg.verbose = get(parser, "verbose")
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-# these dictionaries contain VCS-specific tools
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-
-    return decorate
-
-
-def run_command(
-    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
-):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-LONG_VERSION_PY[
-    "git"
-] = r'''
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
-    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
-    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "%(STYLE)s"
-    cfg.tag_prefix = "%(TAG_PREFIX)s"
-    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
-    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-    return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
-                                 stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %%s" %% dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %%s" %% (commands,))
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %%s (error)" %% dispcmd)
-            print("stdout was %%s" %% stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print("Tried directories %%s but none started with prefix %%s" %%
-              (str(rootdirs), parentdir_prefix))
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %%d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%%s', no digits" %% ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %%s" %% ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %%s" %% r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                          hide_stderr=True)
-    if rc != 0:
-        if verbose:
-            print("Directory %%s not under git control" %% root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
-                                          "--always", "--long",
-                                          "--match", "%%s*" %% tag_prefix],
-                                   cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%%s'"
-                               %% describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%%s' doesn't start with prefix '%%s'"
-                print(fmt %% (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
-                               %% (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
-                                    cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
-                       cwd=root)[0].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%%d" %% pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%%d" %% pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%%s" %% pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%%s" %% pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%%s'" %% style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree",
-                "date": None}
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version", "date": None}
-'''
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r"\d", r)])
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
-            if verbose:
-                print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(
-        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
-    )
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = (
-                "unable to parse git-describe output: '%s'" % describe_out
-            )
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
-                full_tag,
-                tag_prefix,
-            )
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(
-            GITS, ["rev-list", "HEAD", "--count"], cwd=root
-        )
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def do_vcs_install(manifest_in, versionfile_source, ipy):
-    """Git-specific installation logic for Versioneer.
-
-    For Git, this means creating/changing .gitattributes to mark _version.py
-    for export-subst keyword substitution.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    files = [manifest_in, versionfile_source]
-    if ipy:
-        files.append(ipy)
-    try:
-        me = __file__
-        if me.endswith(".pyc") or me.endswith(".pyo"):
-            me = os.path.splitext(me)[0] + ".py"
-        versioneer_file = os.path.relpath(me)
-    except NameError:
-        versioneer_file = "versioneer.py"
-    files.append(versioneer_file)
-    present = False
-    try:
-        f = open(".gitattributes", "r")
-        for line in f.readlines():
-            if line.strip().startswith(versionfile_source):
-                if "export-subst" in line.strip().split()[1:]:
-                    present = True
-        f.close()
-    except EnvironmentError:
-        pass
-    if not present:
-        f = open(".gitattributes", "a+")
-        f.write("%s export-subst\n" % versionfile_source)
-        f.close()
-        files.append(".gitattributes")
-    run_command(GITS, ["add", "--"] + files)
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.18) from
-# revision-control system data, or from the parent directory name of an
-# unpacked source archive. Distribution tarballs contain a pre-generated copy
-# of this file.
-
-import json
-
-version_json = '''
-%s
-'''  # END VERSION_JSON
-
-
-def get_versions():
-    return json.loads(version_json)
-"""
-
-
-def versions_from_file(filename):
-    """Try to determine the version from _version.py if present."""
-    try:
-        with open(filename) as f:
-            contents = f.read()
-    except EnvironmentError:
-        raise NotThisMethod("unable to read _version.py")
-    mo = re.search(
-        r"version_json = '''\n(.*)'''  # END VERSION_JSON",
-        contents,
-        re.M | re.S,
-    )
-    if not mo:
-        mo = re.search(
-            r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
-            contents,
-            re.M | re.S,
-        )
-    if not mo:
-        raise NotThisMethod("no version_json in _version.py")
-    return json.loads(mo.group(1))
-
-
-def write_to_version_file(filename, versions):
-    """Write the given version number to the given _version.py file."""
-    os.unlink(filename)
-    contents = json.dumps(
-        versions, sort_keys=True, indent=1, separators=(",", ": ")
-    )
-    with open(filename, "w") as f:
-        f.write(SHORT_VERSION_PY % contents)
-
-    print("set %s to '%s'" % (filename, versions["version"]))
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
-
-
-class VersioneerBadRootError(Exception):
-    """The project root directory is unknown or missing key files."""
-
-
-def get_versions(verbose=False):
-    """Get the project version from whatever source is available.
-
-    Returns dict with two keys: 'version' and 'full'.
-    """
-    if "versioneer" in sys.modules:
-        # see the discussion in cmdclass.py:get_cmdclass()
-        del sys.modules["versioneer"]
-
-    root = get_root()
-    cfg = get_config_from_root(root)
-
-    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
-    handlers = HANDLERS.get(cfg.VCS)
-    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
-    verbose = verbose or cfg.verbose
-    assert (
-        cfg.versionfile_source is not None
-    ), "please set versioneer.versionfile_source"
-    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
-
-    versionfile_abs = os.path.join(root, cfg.versionfile_source)
-
-    # extract version from first of: _version.py, VCS command (e.g. 'git
-    # describe'), parentdir. This is meant to work for developers using a
-    # source checkout, for users of a tarball created by 'setup.py sdist',
-    # and for users of a tarball/zipball created by 'git archive' or github's
-    # download-from-tag feature or the equivalent in other VCSes.
-
-    get_keywords_f = handlers.get("get_keywords")
-    from_keywords_f = handlers.get("keywords")
-    if get_keywords_f and from_keywords_f:
-        try:
-            keywords = get_keywords_f(versionfile_abs)
-            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
-            if verbose:
-                print("got version from expanded keyword %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        ver = versions_from_file(versionfile_abs)
-        if verbose:
-            print("got version from file %s %s" % (versionfile_abs, ver))
-        return ver
-    except NotThisMethod:
-        pass
-
-    from_vcs_f = handlers.get("pieces_from_vcs")
-    if from_vcs_f:
-        try:
-            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
-            ver = render(pieces, cfg.style)
-            if verbose:
-                print("got version from VCS %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        if cfg.parentdir_prefix:
-            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-            if verbose:
-                print("got version from parentdir %s" % ver)
-            return ver
-    except NotThisMethod:
-        pass
-
-    if verbose:
-        print("unable to compute version")
-
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
-
-
-def get_version():
-    """Get the short version string for this project."""
-    return get_versions()["version"]
-
-
-def get_cmdclass():
-    """Get the custom setuptools/distutils subclasses used by Versioneer."""
-    if "versioneer" in sys.modules:
-        del sys.modules["versioneer"]
-        # this fixes the "python setup.py develop" case (also 'install' and
-        # 'easy_install .'), in which subdependencies of the main project are
-        # built (using setup.py bdist_egg) in the same python process. Assume
-        # a main project A and a dependency B, which use different versions
-        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
-        # sys.modules by the time B's setup.py is executed, causing B to run
-        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
-        # sandbox that restores sys.modules to it's pre-build state, so the
-        # parent is protected against the child's "import versioneer". By
-        # removing ourselves from sys.modules here, before the child build
-        # happens, we protect the child from the parent's versioneer too.
-        # Also see https://github.com/warner/python-versioneer/issues/52
-
-    cmds = {}
-
-    # we add "version" to both distutils and setuptools
-    from distutils.core import Command
-
-    class cmd_version(Command):
-        description = "report generated version string"
-        user_options = []
-        boolean_options = []
-
-        def initialize_options(self):
-            pass
-
-        def finalize_options(self):
-            pass
-
-        def run(self):
-            vers = get_versions(verbose=True)
-            print("Version: %s" % vers["version"])
-            print(" full-revisionid: %s" % vers.get("full-revisionid"))
-            print(" dirty: %s" % vers.get("dirty"))
-            print(" date: %s" % vers.get("date"))
-            if vers["error"]:
-                print(" error: %s" % vers["error"])
-
-    cmds["version"] = cmd_version
-
-    # we override "build_py" in both distutils and setuptools
-    #
-    # most invocation pathways end up running build_py:
-    #  distutils/build -> build_py
-    #  distutils/install -> distutils/build ->..
-    #  setuptools/bdist_wheel -> distutils/install ->..
-    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
-    #  setuptools/install -> bdist_egg ->..
-    #  setuptools/develop -> ?
-    #  pip install:
-    #   copies source tree to a tempdir before running egg_info/etc
-    #   if .git isn't copied too, 'git describe' will fail
-    #   then does setup.py bdist_wheel, or sometimes setup.py install
-    #  setup.py egg_info -> ?
-
-    # we override different "build_py" commands for both environments
-    if "setuptools" in sys.modules:
-        from setuptools.command.build_py import build_py as _build_py
-    else:
-        from distutils.command.build_py import build_py as _build_py
-
-    class cmd_build_py(_build_py):
-        def run(self):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            versions = get_versions()
-            _build_py.run(self)
-            # now locate _version.py in the new build/ directory and replace
-            # it with an updated value
-            if cfg.versionfile_build:
-                target_versionfile = os.path.join(
-                    self.build_lib, cfg.versionfile_build
-                )
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-    cmds["build_py"] = cmd_build_py
-
-    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
-        from cx_Freeze.dist import build_exe as _build_exe
-
-        # nczeczulin reports that py2exe won't like the pep440-style string
-        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
-        # setup(console=[{
-        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
-        #   "product_version": versioneer.get_version(),
-        #   ...
-
-        class cmd_build_exe(_build_exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _build_exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(
-                        LONG
-                        % {
-                            "DOLLAR": "$",
-                            "STYLE": cfg.style,
-                            "TAG_PREFIX": cfg.tag_prefix,
-                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        }
-                    )
-
-        cmds["build_exe"] = cmd_build_exe
-        del cmds["build_py"]
-
-    if "py2exe" in sys.modules:  # py2exe enabled?
-        try:
-            from py2exe.distutils_buildexe import py2exe as _py2exe  # py3
-        except ImportError:
-            from py2exe.build_exe import py2exe as _py2exe  # py2
-
-        class cmd_py2exe(_py2exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _py2exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(
-                        LONG
-                        % {
-                            "DOLLAR": "$",
-                            "STYLE": cfg.style,
-                            "TAG_PREFIX": cfg.tag_prefix,
-                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        }
-                    )
-
-        cmds["py2exe"] = cmd_py2exe
-
-    # we override different "sdist" commands for both environments
-    if "setuptools" in sys.modules:
-        from setuptools.command.sdist import sdist as _sdist
-    else:
-        from distutils.command.sdist import sdist as _sdist
-
-    class cmd_sdist(_sdist):
-        def run(self):
-            versions = get_versions()
-            self._versioneer_generated_versions = versions
-            # unless we update this, the command will keep using the old
-            # version
-            self.distribution.metadata.version = versions["version"]
-            return _sdist.run(self)
-
-        def make_release_tree(self, base_dir, files):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            _sdist.make_release_tree(self, base_dir, files)
-            # now locate _version.py in the new base_dir directory
-            # (remembering that it may be a hardlink) and replace it with an
-            # updated value
-            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
-            print("UPDATING %s" % target_versionfile)
-            write_to_version_file(
-                target_versionfile, self._versioneer_generated_versions
-            )
-
-    cmds["sdist"] = cmd_sdist
-
-    return cmds
-
-
-CONFIG_ERROR = """
-setup.cfg is missing the necessary Versioneer configuration. You need
-a section like:
-
- [versioneer]
- VCS = git
- style = pep440
- versionfile_source = src/myproject/_version.py
- versionfile_build = myproject/_version.py
- tag_prefix =
- parentdir_prefix = myproject-
-
-You will also need to edit your setup.py to use the results:
-
- import versioneer
- setup(version=versioneer.get_version(),
-       cmdclass=versioneer.get_cmdclass(), ...)
-
-Please read the docstring in ./versioneer.py for configuration instructions,
-edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
-"""
-
-SAMPLE_CONFIG = """
-# See the docstring in versioneer.py for instructions. Note that you must
-# re-run 'versioneer.py setup' after changing this section, and commit the
-# resulting files.
-
-[versioneer]
-#VCS = git
-#style = pep440
-#versionfile_source =
-#versionfile_build =
-#tag_prefix =
-#parentdir_prefix =
-
-"""
-
-INIT_PY_SNIPPET = """
-from cudf._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
-"""
-
-
-def do_setup():
-    """Main VCS-independent setup function for installing Versioneer."""
-    root = get_root()
-    try:
-        cfg = get_config_from_root(root)
-    except (
-        EnvironmentError,
-        configparser.NoSectionError,
-        configparser.NoOptionError,
-    ) as e:
-        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
-            print(
-                "Adding sample versioneer config to setup.cfg", file=sys.stderr
-            )
-            with open(os.path.join(root, "setup.cfg"), "a") as f:
-                f.write(SAMPLE_CONFIG)
-        print(CONFIG_ERROR, file=sys.stderr)
-        return 1
-
-    print(" creating %s" % cfg.versionfile_source)
-    with open(cfg.versionfile_source, "w") as f:
-        LONG = LONG_VERSION_PY[cfg.VCS]
-        f.write(
-            LONG
-            % {
-                "DOLLAR": "$",
-                "STYLE": cfg.style,
-                "TAG_PREFIX": cfg.tag_prefix,
-                "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                "VERSIONFILE_SOURCE": cfg.versionfile_source,
-            }
-        )
-
-    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
-    if os.path.exists(ipy):
-        try:
-            with open(ipy, "r") as f:
-                old = f.read()
-        except EnvironmentError:
-            old = ""
-        if INIT_PY_SNIPPET not in old:
-            print(" appending to %s" % ipy)
-            with open(ipy, "a") as f:
-                f.write(INIT_PY_SNIPPET)
-        else:
-            print(" %s unmodified" % ipy)
-    else:
-        print(" %s doesn't exist, ok" % ipy)
-        ipy = None
-
-    # Make sure both the top-level "versioneer.py" and versionfile_source
-    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
-    # they'll be copied into source distributions. Pip won't be able to
-    # install the package without this.
-    manifest_in = os.path.join(root, "MANIFEST.in")
-    simple_includes = set()
-    try:
-        with open(manifest_in, "r") as f:
-            for line in f:
-                if line.startswith("include "):
-                    for include in line.split()[1:]:
-                        simple_includes.add(include)
-    except EnvironmentError:
-        pass
-    # That doesn't cover everything MANIFEST.in can do
-    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
-    # it might give some false negatives. Appending redundant 'include'
-    # lines is safe, though.
-    if "versioneer.py" not in simple_includes:
-        print(" appending 'versioneer.py' to MANIFEST.in")
-        with open(manifest_in, "a") as f:
-            f.write("include versioneer.py\n")
-    else:
-        print(" 'versioneer.py' already in MANIFEST.in")
-    if cfg.versionfile_source not in simple_includes:
-        print(
-            " appending versionfile_source ('%s') to MANIFEST.in"
-            % cfg.versionfile_source
-        )
-        with open(manifest_in, "a") as f:
-            f.write("include %s\n" % cfg.versionfile_source)
-    else:
-        print(" versionfile_source already in MANIFEST.in")
-
-    # Make VCS-specific changes. For git, this means creating/changing
-    # .gitattributes to mark _version.py for export-subst keyword
-    # substitution.
-    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
-    return 0
-
-
-def scan_setup_py():
-    """Validate the contents of setup.py against Versioneer's expectations."""
-    found = set()
-    setters = False
-    errors = 0
-    with open("setup.py", "r") as f:
-        for line in f.readlines():
-            if "import versioneer" in line:
-                found.add("import")
-            if "versioneer.get_cmdclass()" in line:
-                found.add("cmdclass")
-            if "versioneer.get_version()" in line:
-                found.add("get_version")
-            if "versioneer.VCS" in line:
-                setters = True
-            if "versioneer.versionfile_source" in line:
-                setters = True
-    if len(found) != 3:
-        print("")
-        print("Your setup.py appears to be missing some important items")
-        print("(but I might be wrong). Please make sure it has something")
-        print("roughly like the following:")
-        print("")
-        print(" import versioneer")
-        print(" setup( version=versioneer.get_version(),")
-        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
-        print("")
-        errors += 1
-    if setters:
-        print("You should remove lines like 'versioneer.VCS = ' and")
-        print("'versioneer.versionfile_source = ' . This configuration")
-        print("now lives in setup.cfg, and should be removed from setup.py")
-        print("")
-        errors += 1
-    return errors
-
-
-if __name__ == "__main__":
-    cmd = sys.argv[1]
-    if cmd == "setup":
-        errors = do_setup()
-        errors += scan_setup_py()
-        if errors:
-            sys.exit(1)
diff --git a/python/cudf_kafka/cudf_kafka/_version.py b/python/cudf_kafka/cudf_kafka/_version.py
deleted file mode 100644
index 3c1d113fd47..00000000000
--- a/python/cudf_kafka/cudf_kafka/_version.py
+++ /dev/null
@@ -1,566 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "$Format:%d$"
-    git_full = "$Format:%H$"
-    git_date = "$Format:%ci$"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "pep440"
-    cfg.tag_prefix = "v"
-    cfg.parentdir_prefix = "cudf_kafka-"
-    cfg.versionfile_source = "cudf_kafka/_version.py"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-
-    return decorate
-
-
-def run_command(
-    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
-):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
-            break
-        except OSError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print(f"unable to find command, tried {commands}")
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs)
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except OSError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = {r.strip() for r in refnames.strip("()").split(",")}
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = {r for r in refs if re.search(r"\d", r)}
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
-            if verbose:
-                print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(
-        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
-    )
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = (
-                "unable to parse git-describe output: '%s'" % describe_out
-            )
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces[
-                "error"
-            ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'"
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(
-            GITS, ["rev-list", "HEAD", "--count"], cwd=root
-        )
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(
-            get_keywords(), cfg.tag_prefix, verbose
-        )
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split("/"):
-            root = os.path.dirname(root)
-    except NameError:
-        return {
-            "version": "0+unknown",
-            "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to find root of source tree",
-            "date": None,
-        }
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
diff --git a/python/cudf_kafka/setup.cfg b/python/cudf_kafka/setup.cfg
index f884e67908b..ee0d783b184 100644
--- a/python/cudf_kafka/setup.cfg
+++ b/python/cudf_kafka/setup.cfg
@@ -1,12 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-[versioneer]
-VCS = git
-style = pep440
-versionfile_source = cudf_kafka/_version.py
-versionfile_build = cudf_kafka/_version.py
-tag_prefix = v
-parentdir_prefix = cudf_kafka-
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 [isort]
 line_length=79
diff --git a/python/cudf_kafka/setup.py b/python/cudf_kafka/setup.py
index caadfcac8aa..c39b65cdb55 100644
--- a/python/cudf_kafka/setup.py
+++ b/python/cudf_kafka/setup.py
@@ -6,7 +6,6 @@
 
 import numpy as np
 import pyarrow as pa
-import versioneer
 from Cython.Build import cythonize
 from setuptools import find_packages, setup
 from setuptools.extension import Extension
@@ -87,7 +86,7 @@
 
 setup(
     name="cudf_kafka",
-    version=versioneer.get_version(),
+    version="23.04.00",
     description="cuDF Kafka Datasource",
     url="https://github.com/rapidsai/cudf",
     author="NVIDIA Corporation",
@@ -116,7 +115,6 @@
         find_packages(include=["cudf_kafka._lib*"]),
         ["*.pxd"],
     ),
-    cmdclass=versioneer.get_cmdclass(),
     install_requires=install_requires,
     extras_require=extras_require,
     zip_safe=False,
diff --git a/python/cudf_kafka/versioneer.py b/python/cudf_kafka/versioneer.py
deleted file mode 100644
index dbddb6e0fd0..00000000000
--- a/python/cudf_kafka/versioneer.py
+++ /dev/null
@@ -1,1904 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-# Version: 0.18
-
-"""The Versioneer - like a rocketeer, but for versions.
-
-The Versioneer
-==============
-
-* like a rocketeer, but for versions!
-* https://github.com/warner/python-versioneer
-* Brian Warner
-* License: Public Domain
-* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
-* [![Latest Version]
-(https://pypip.in/version/versioneer/badge.svg?style=flat)
-](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status]
-(https://travis-ci.org/warner/python-versioneer.png?branch=master)
-](https://travis-ci.org/warner/python-versioneer)
-
-This is a tool for managing a recorded version number in distutils-based
-python projects. The goal is to remove the tedious and error-prone "update
-the embedded version string" step from your release process. Making a new
-release should be as easy as recording a new tag in your version-control
-system, and maybe making new tarballs.
-
-
-## Quick Install
-
-* `pip install versioneer` to somewhere to your $PATH
-* add a `[versioneer]` section to your setup.cfg (see below)
-* run `versioneer install` in your source tree, commit the results
-
-## Version Identifiers
-
-Source trees come from a variety of places:
-
-* a version-control system checkout (mostly used by developers)
-* a nightly tarball, produced by build automation
-* a snapshot tarball, produced by a web-based VCS browser, like github's
-  "tarball from tag" feature
-* a release tarball, produced by "setup.py sdist", distributed through PyPI
-
-Within each source tree, the version identifier (either a string or a number,
-this tool is format-agnostic) can come from a variety of places:
-
-* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
-  about recent "tags" and an absolute revision-id
-* the name of the directory into which the tarball was unpacked
-* an expanded VCS keyword ($Id$, etc)
-* a `_version.py` created by some earlier build step
-
-For released software, the version identifier is closely related to a VCS
-tag. Some projects use tag names that include more than just the version
-string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
-needs to strip the tag prefix to extract the version identifier. For
-unreleased software (between tags), the version identifier should provide
-enough information to help developers recreate the same tree, while also
-giving them an idea of roughly how old the tree is (after version 1.2, before
-version 1.3). Many VCS systems can report a description that captures this,
-for example `git describe --tags --dirty --always` reports things like
-"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
-0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes.
-
-The version identifier is used for multiple purposes:
-
-* to allow the module to self-identify its version: `myproject.__version__`
-* to choose a name and prefix for a 'setup.py sdist' tarball
-
-## Theory of Operation
-
-Versioneer works by adding a special `_version.py` file into your source
-tree, where your `__init__.py` can import it. This `_version.py` knows how to
-dynamically ask the VCS tool for version information at import time.
-
-`_version.py` also contains `$Revision$` markers, and the installation
-process marks `_version.py` to have this marker rewritten with a tag name
-during the `git archive` command. As a result, generated tarballs will
-contain enough information to get the proper version.
-
-To allow `setup.py` to compute a version too, a `versioneer.py` is added to
-the top level of your source tree, next to `setup.py` and the `setup.cfg`
-that configures it. This overrides several distutils/setuptools commands to
-compute the version when invoked, and changes `setup.py build` and `setup.py
-sdist` to replace `_version.py` with a small static file that contains just
-the generated version data.
-
-## Installation
-
-See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
-
-## Version-String Flavors
-
-Code which uses Versioneer can learn about its version string at runtime by
-importing `_version` from your main `__init__.py` file and running the
-`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
-import the top-level `versioneer.py` and run `get_versions()`.
-
-Both functions return a dictionary with different flavors of version
-information:
-
-* `['version']`: A condensed version string, rendered using the selected
-  style. This is the most commonly used value for the project's version
-  string. The default "pep440" style yields strings like `0.11`,
-  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
-  below for alternative styles.
-
-* `['full-revisionid']`: detailed revision identifier. For Git, this is the
-  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
-
-* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
-  commit date in ISO 8601 format. This will be None if the date is not
-  available.
-
-* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
-  this is only accurate if run in a VCS checkout, otherwise it is likely to
-  be False or None
-
-* `['error']`: if the version string could not be computed, this will be set
-  to a string describing the problem, otherwise it will be None. It may be
-  useful to throw an exception in setup.py if this is set, to avoid e.g.
-  creating tarballs with a version string of "unknown".
-
-Some variants are more useful than others. Including `full-revisionid` in a
-bug report should allow developers to reconstruct the exact code being tested
-(or indicate the presence of local changes that should be shared with the
-developers). `version` is suitable for display in an "about" box or a CLI
-`--version` output: it can be easily compared against release notes and lists
-of bugs fixed in various releases.
-
-The installer adds the following text to your `__init__.py` to place a basic
-version in `YOURPROJECT.__version__`:
-
-    from cudf_kafka._version import get_versions
-    __version__ = get_versions()['version']
-    del get_versions
-
-## Styles
-
-The setup.cfg `style=` configuration controls how the VCS information is
-rendered into a version string.
-
-The default style, "pep440", produces a PEP440-compliant string, equal to the
-un-prefixed tag name for actual releases, and containing an additional "local
-version" section with more detail for in-between builds. For Git, this is
-TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
-tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
-that this commit is two revisions ("+2") beyond the "0.11" tag. For released
-software (exactly equal to a known tag), the identifier will only contain the
-stripped tag, e.g. "0.11".
-
-Other styles are available. See [details.md](details.md) in the Versioneer
-source tree for descriptions.
-
-## Debugging
-
-Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
-to return a version of "0+unknown". To investigate the problem, run `setup.py
-version`, which will run the version-lookup code in a verbose mode, and will
-display the full contents of `get_versions()` (including the `error` string,
-which may help identify what went wrong).
-
-## Known Limitations
-
-Some situations are known to cause problems for Versioneer. This details the
-most significant ones. More can be found on Github
-[issues page](https://github.com/warner/python-versioneer/issues).
-
-### Subprojects
-
-Versioneer has limited support for source trees in which `setup.py` is not in
-the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
-two common reasons why `setup.py` might not be in the root:
-
-* Source trees which contain multiple subprojects, such as
-  [Buildbot](https://github.com/buildbot/buildbot), which contains both
-  "master" and "slave" subprojects, each with their own `setup.py`,
-  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
-  distributions (and upload multiple independently-installable tarballs).
-* Source trees whose main purpose is to contain a C library, but which also
-  provide bindings to Python (and perhaps other langauges) in subdirectories.
-
-Versioneer will look for `.git` in parent directories, and most operations
-should get the right version string. However `pip` and `setuptools` have bugs
-and implementation details which frequently cause `pip install .` from a
-subproject directory to fail to find a correct version string (so it usually
-defaults to `0+unknown`).
-
-`pip install --editable .` should work correctly. `setup.py install` might
-work too.
-
-Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
-some later version.
-
-[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
-this issue. The discussion in
-[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
-issue from the Versioneer side in more detail.
-[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
-[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
-pip to let Versioneer work correctly.
-
-Versioneer-0.16 and earlier only looked for a `.git` directory next to the
-`setup.cfg`, so subprojects were completely unsupported with those releases.
-
-### Editable installs with setuptools <= 18.5
-
-`setup.py develop` and `pip install --editable .` allow you to install a
-project into a virtualenv once, then continue editing the source code (and
-test) without re-installing after every change.
-
-"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
-convenient way to specify executable scripts that should be installed along
-with the python package.
-
-These both work as expected when using modern setuptools. When using
-setuptools-18.5 or earlier, however, certain operations will cause
-`pkg_resources.DistributionNotFound` errors when running the entrypoint
-script, which must be resolved by re-installing the package. This happens
-when the install happens with one version, then the egg_info data is
-regenerated while a different version is checked out. Many setup.py commands
-cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
-a different virtualenv), so this can be surprising.
-
-[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
-this one, but upgrading to a newer version of setuptools should probably
-resolve it.
-
-### Unicode version strings
-
-While Versioneer works (and is continually tested) with both Python 2 and
-Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
-Newer releases probably generate unicode version strings on py2. It's not
-clear that this is wrong, but it may be surprising for applications when then
-write these strings to a network connection or include them in bytes-oriented
-APIs like cryptographic checksums.
-
-[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
-this question.
-
-
-## Updating Versioneer
-
-To upgrade your project to a new release of Versioneer, do the following:
-
-* install the new Versioneer (`pip install -U versioneer` or equivalent)
-* edit `setup.cfg`, if necessary, to include any new configuration settings
-  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
-* re-run `versioneer install` in your source tree, to replace
-  `SRC/_version.py`
-* commit any changed files
-
-## Future Directions
-
-This tool is designed to make it easily extended to other version-control
-systems: all VCS-specific components are in separate directories like
-src/git/ . The top-level `versioneer.py` script is assembled from these
-components by running make-versioneer.py . In the future, make-versioneer.py
-will take a VCS name as an argument, and will construct a version of
-`versioneer.py` that is specific to the given VCS. It might also take the
-configuration arguments that are currently provided manually during
-installation by editing setup.py . Alternatively, it might go the other
-direction and include code from all supported VCS systems, reducing the
-number of intermediate scripts.
-
-
-## License
-
-To make Versioneer easier to embed, all its code is dedicated to the public
-domain. The `_version.py` that it creates is also in the public domain.
-Specifically, both are released under the Creative Commons "Public Domain
-Dedication" license (CC0-1.0), as described in
-https://creativecommons.org/publicdomain/zero/1.0/ .
-
-"""
-
-
-import errno
-import json
-import os
-import re
-import subprocess
-import sys
-
-try:
-    import configparser
-except ImportError:
-    import ConfigParser as configparser
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_root():
-    """Get the project root directory.
-
-    We require that all commands are run from the project root, i.e. the
-    directory that contains setup.py, setup.cfg, and versioneer.py .
-    """
-    root = os.path.realpath(os.path.abspath(os.getcwd()))
-    setup_py = os.path.join(root, "setup.py")
-    versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        # allow 'python path/to/setup.py COMMAND'
-        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
-        setup_py = os.path.join(root, "setup.py")
-        versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        err = (
-            "Versioneer was unable to run the project root directory. "
-            "Versioneer requires setup.py to be executed from "
-            "its immediate directory (like 'python setup.py COMMAND'), "
-            "or in a way that lets it use sys.argv[0] to find the root "
-            "(like 'python path/to/setup.py COMMAND')."
-        )
-        raise VersioneerBadRootError(err)
-    try:
-        # Certain runtime workflows (setup.py install/develop in a setuptools
-        # tree) execute all dependencies in a single python process, so
-        # "versioneer" may be imported multiple times, and python's shared
-        # module-import table will cache the first one. So we can't use
-        # os.path.dirname(__file__), as that will find whichever
-        # versioneer.py was first imported, even in later projects.
-        me = os.path.realpath(os.path.abspath(__file__))
-        me_dir = os.path.normcase(os.path.splitext(me)[0])
-        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
-        if me_dir != vsr_dir:
-            print(
-                "Warning: build in %s is using versioneer.py from %s"
-                % (os.path.dirname(me), versioneer_py)
-            )
-    except NameError:
-        pass
-    return root
-
-
-def get_config_from_root(root):
-    """Read the project setup.cfg file to determine Versioneer config."""
-    # This might raise EnvironmentError (if setup.cfg is missing), or
-    # configparser.NoSectionError (if it lacks a [versioneer] section), or
-    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
-    # the top of versioneer.py for instructions on writing your setup.cfg .
-    setup_cfg = os.path.join(root, "setup.cfg")
-    parser = configparser.SafeConfigParser()
-    with open(setup_cfg) as f:
-        parser.readfp(f)
-    VCS = parser.get("versioneer", "VCS")  # mandatory
-
-    def get(parser, name):
-        if parser.has_option("versioneer", name):
-            return parser.get("versioneer", name)
-        return None
-
-    cfg = VersioneerConfig()
-    cfg.VCS = VCS
-    cfg.style = get(parser, "style") or ""
-    cfg.versionfile_source = get(parser, "versionfile_source")
-    cfg.versionfile_build = get(parser, "versionfile_build")
-    cfg.tag_prefix = get(parser, "tag_prefix")
-    if cfg.tag_prefix in ("''", '""'):
-        cfg.tag_prefix = ""
-    cfg.parentdir_prefix = get(parser, "parentdir_prefix")
-    cfg.verbose = get(parser, "verbose")
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-# these dictionaries contain VCS-specific tools
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-
-    return decorate
-
-
-def run_command(
-    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
-):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
-            break
-        except OSError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print(f"unable to find command, tried {commands}")
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-LONG_VERSION_PY[
-    "git"
-] = r'''
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
-    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
-    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "%(STYLE)s"
-    cfg.tag_prefix = "%(TAG_PREFIX)s"
-    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
-    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-    return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
-                                 stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %%s" %% dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %%s" %% (commands,))
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %%s (error)" %% dispcmd)
-            print("stdout was %%s" %% stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print("Tried directories %%s but none started with prefix %%s" %%
-              (str(rootdirs), parentdir_prefix))
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %%d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%%s', no digits" %% ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %%s" %% ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %%s" %% r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                          hide_stderr=True)
-    if rc != 0:
-        if verbose:
-            print("Directory %%s not under git control" %% root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
-                                          "--always", "--long",
-                                          "--match", "%%s*" %% tag_prefix],
-                                   cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%%s'"
-                               %% describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%%s' doesn't start with prefix '%%s'"
-                print(fmt %% (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
-                               %% (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
-                                    cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
-                       cwd=root)[0].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%%d" %% pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%%d" %% pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%%s" %% pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%%s" %% pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%%s'" %% style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree",
-                "date": None}
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version", "date": None}
-'''
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs)
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except OSError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = {r.strip() for r in refnames.strip("()").split(",")}
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = {r for r in refs if re.search(r"\d", r)}
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
-            if verbose:
-                print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(
-        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
-    )
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = (
-                "unable to parse git-describe output: '%s'" % describe_out
-            )
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format(
-                full_tag,
-                tag_prefix,
-            )
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(
-            GITS, ["rev-list", "HEAD", "--count"], cwd=root
-        )
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def do_vcs_install(manifest_in, versionfile_source, ipy):
-    """Git-specific installation logic for Versioneer.
-
-    For Git, this means creating/changing .gitattributes to mark _version.py
-    for export-subst keyword substitution.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    files = [manifest_in, versionfile_source]
-    if ipy:
-        files.append(ipy)
-    try:
-        me = __file__
-        if me.endswith(".pyc") or me.endswith(".pyo"):
-            me = os.path.splitext(me)[0] + ".py"
-        versioneer_file = os.path.relpath(me)
-    except NameError:
-        versioneer_file = "versioneer.py"
-    files.append(versioneer_file)
-    present = False
-    try:
-        f = open(".gitattributes")
-        for line in f.readlines():
-            if line.strip().startswith(versionfile_source):
-                if "export-subst" in line.strip().split()[1:]:
-                    present = True
-        f.close()
-    except OSError:
-        pass
-    if not present:
-        f = open(".gitattributes", "a+")
-        f.write("%s export-subst\n" % versionfile_source)
-        f.close()
-        files.append(".gitattributes")
-    run_command(GITS, ["add", "--"] + files)
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.18) from
-# revision-control system data, or from the parent directory name of an
-# unpacked source archive. Distribution tarballs contain a pre-generated copy
-# of this file.
-
-import json
-
-version_json = '''
-%s
-'''  # END VERSION_JSON
-
-
-def get_versions():
-    return json.loads(version_json)
-"""
-
-
-def versions_from_file(filename):
-    """Try to determine the version from _version.py if present."""
-    try:
-        with open(filename) as f:
-            contents = f.read()
-    except OSError:
-        raise NotThisMethod("unable to read _version.py")
-    mo = re.search(
-        r"version_json = '''\n(.*)'''  # END VERSION_JSON",
-        contents,
-        re.M | re.S,
-    )
-    if not mo:
-        mo = re.search(
-            r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
-            contents,
-            re.M | re.S,
-        )
-    if not mo:
-        raise NotThisMethod("no version_json in _version.py")
-    return json.loads(mo.group(1))
-
-
-def write_to_version_file(filename, versions):
-    """Write the given version number to the given _version.py file."""
-    os.unlink(filename)
-    contents = json.dumps(
-        versions, sort_keys=True, indent=1, separators=(",", ": ")
-    )
-    with open(filename, "w") as f:
-        f.write(SHORT_VERSION_PY % contents)
-
-    print("set {} to '{}'".format(filename, versions["version"]))
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
-
-
-class VersioneerBadRootError(Exception):
-    """The project root directory is unknown or missing key files."""
-
-
-def get_versions(verbose=False):
-    """Get the project version from whatever source is available.
-
-    Returns dict with two keys: 'version' and 'full'.
-    """
-    if "versioneer" in sys.modules:
-        # see the discussion in cmdclass.py:get_cmdclass()
-        del sys.modules["versioneer"]
-
-    root = get_root()
-    cfg = get_config_from_root(root)
-
-    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
-    handlers = HANDLERS.get(cfg.VCS)
-    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
-    verbose = verbose or cfg.verbose
-    assert (
-        cfg.versionfile_source is not None
-    ), "please set versioneer.versionfile_source"
-    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
-
-    versionfile_abs = os.path.join(root, cfg.versionfile_source)
-
-    # extract version from first of: _version.py, VCS command (e.g. 'git
-    # describe'), parentdir. This is meant to work for developers using a
-    # source checkout, for users of a tarball created by 'setup.py sdist',
-    # and for users of a tarball/zipball created by 'git archive' or github's
-    # download-from-tag feature or the equivalent in other VCSes.
-
-    get_keywords_f = handlers.get("get_keywords")
-    from_keywords_f = handlers.get("keywords")
-    if get_keywords_f and from_keywords_f:
-        try:
-            keywords = get_keywords_f(versionfile_abs)
-            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
-            if verbose:
-                print("got version from expanded keyword %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        ver = versions_from_file(versionfile_abs)
-        if verbose:
-            print(f"got version from file {versionfile_abs} {ver}")
-        return ver
-    except NotThisMethod:
-        pass
-
-    from_vcs_f = handlers.get("pieces_from_vcs")
-    if from_vcs_f:
-        try:
-            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
-            ver = render(pieces, cfg.style)
-            if verbose:
-                print("got version from VCS %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        if cfg.parentdir_prefix:
-            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-            if verbose:
-                print("got version from parentdir %s" % ver)
-            return ver
-    except NotThisMethod:
-        pass
-
-    if verbose:
-        print("unable to compute version")
-
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
-
-
-def get_version():
-    """Get the short version string for this project."""
-    return get_versions()["version"]
-
-
-def get_cmdclass():
-    """Get the custom setuptools/distutils subclasses used by Versioneer."""
-    if "versioneer" in sys.modules:
-        del sys.modules["versioneer"]
-        # this fixes the "python setup.py develop" case (also 'install' and
-        # 'easy_install .'), in which subdependencies of the main project are
-        # built (using setup.py bdist_egg) in the same python process. Assume
-        # a main project A and a dependency B, which use different versions
-        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
-        # sys.modules by the time B's setup.py is executed, causing B to run
-        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
-        # sandbox that restores sys.modules to it's pre-build state, so the
-        # parent is protected against the child's "import versioneer". By
-        # removing ourselves from sys.modules here, before the child build
-        # happens, we protect the child from the parent's versioneer too.
-        # Also see https://github.com/warner/python-versioneer/issues/52
-
-    cmds = {}
-
-    # we add "version" to both distutils and setuptools
-    from distutils.core import Command
-
-    class cmd_version(Command):
-        description = "report generated version string"
-        user_options = []
-        boolean_options = []
-
-        def initialize_options(self):
-            pass
-
-        def finalize_options(self):
-            pass
-
-        def run(self):
-            vers = get_versions(verbose=True)
-            print("Version: %s" % vers["version"])
-            print(" full-revisionid: %s" % vers.get("full-revisionid"))
-            print(" dirty: %s" % vers.get("dirty"))
-            print(" date: %s" % vers.get("date"))
-            if vers["error"]:
-                print(" error: %s" % vers["error"])
-
-    cmds["version"] = cmd_version
-
-    # we override "build_py" in both distutils and setuptools
-    #
-    # most invocation pathways end up running build_py:
-    #  distutils/build -> build_py
-    #  distutils/install -> distutils/build ->..
-    #  setuptools/bdist_wheel -> distutils/install ->..
-    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
-    #  setuptools/install -> bdist_egg ->..
-    #  setuptools/develop -> ?
-    #  pip install:
-    #   copies source tree to a tempdir before running egg_info/etc
-    #   if .git isn't copied too, 'git describe' will fail
-    #   then does setup.py bdist_wheel, or sometimes setup.py install
-    #  setup.py egg_info -> ?
-
-    # we override different "build_py" commands for both environments
-    if "setuptools" in sys.modules:
-        from setuptools.command.build_py import build_py as _build_py
-    else:
-        from distutils.command.build_py import build_py as _build_py
-
-    class cmd_build_py(_build_py):
-        def run(self):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            versions = get_versions()
-            _build_py.run(self)
-            # now locate _version.py in the new build/ directory and replace
-            # it with an updated value
-            if cfg.versionfile_build:
-                target_versionfile = os.path.join(
-                    self.build_lib, cfg.versionfile_build
-                )
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-    cmds["build_py"] = cmd_build_py
-
-    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
-        from cx_Freeze.dist import build_exe as _build_exe
-
-        # nczeczulin reports that py2exe won't like the pep440-style string
-        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
-        # setup(console=[{
-        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
-        #   "product_version": versioneer.get_version(),
-        #   ...
-
-        class cmd_build_exe(_build_exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _build_exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(
-                        LONG
-                        % {
-                            "DOLLAR": "$",
-                            "STYLE": cfg.style,
-                            "TAG_PREFIX": cfg.tag_prefix,
-                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        }
-                    )
-
-        cmds["build_exe"] = cmd_build_exe
-        del cmds["build_py"]
-
-    if "py2exe" in sys.modules:  # py2exe enabled?
-        try:
-            from py2exe.distutils_buildexe import py2exe as _py2exe  # py3
-        except ImportError:
-            from py2exe.build_exe import py2exe as _py2exe  # py2
-
-        class cmd_py2exe(_py2exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _py2exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(
-                        LONG
-                        % {
-                            "DOLLAR": "$",
-                            "STYLE": cfg.style,
-                            "TAG_PREFIX": cfg.tag_prefix,
-                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        }
-                    )
-
-        cmds["py2exe"] = cmd_py2exe
-
-    # we override different "sdist" commands for both environments
-    if "setuptools" in sys.modules:
-        from setuptools.command.sdist import sdist as _sdist
-    else:
-        from distutils.command.sdist import sdist as _sdist
-
-    class cmd_sdist(_sdist):
-        def run(self):
-            versions = get_versions()
-            self._versioneer_generated_versions = versions
-            # unless we update this, the command will keep using the old
-            # version
-            self.distribution.metadata.version = versions["version"]
-            return _sdist.run(self)
-
-        def make_release_tree(self, base_dir, files):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            _sdist.make_release_tree(self, base_dir, files)
-            # now locate _version.py in the new base_dir directory
-            # (remembering that it may be a hardlink) and replace it with an
-            # updated value
-            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
-            print("UPDATING %s" % target_versionfile)
-            write_to_version_file(
-                target_versionfile, self._versioneer_generated_versions
-            )
-
-    cmds["sdist"] = cmd_sdist
-
-    return cmds
-
-
-CONFIG_ERROR = """
-setup.cfg is missing the necessary Versioneer configuration. You need
-a section like:
-
- [versioneer]
- VCS = git
- style = pep440
- versionfile_source = src/myproject/_version.py
- versionfile_build = myproject/_version.py
- tag_prefix =
- parentdir_prefix = myproject-
-
-You will also need to edit your setup.py to use the results:
-
- import versioneer
- setup(version=versioneer.get_version(),
-       cmdclass=versioneer.get_cmdclass(), ...)
-
-Please read the docstring in ./versioneer.py for configuration instructions,
-edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
-"""
-
-SAMPLE_CONFIG = """
-# See the docstring in versioneer.py for instructions. Note that you must
-# re-run 'versioneer.py setup' after changing this section, and commit the
-# resulting files.
-
-[versioneer]
-#VCS = git
-#style = pep440
-#versionfile_source =
-#versionfile_build =
-#tag_prefix =
-#parentdir_prefix =
-
-"""
-
-INIT_PY_SNIPPET = """
-from cudf_kafka._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
-"""
-
-
-def do_setup():
-    """Main VCS-independent setup function for installing Versioneer."""
-    root = get_root()
-    try:
-        cfg = get_config_from_root(root)
-    except (
-        OSError,
-        configparser.NoSectionError,
-        configparser.NoOptionError,
-    ) as e:
-        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
-            print(
-                "Adding sample versioneer config to setup.cfg", file=sys.stderr
-            )
-            with open(os.path.join(root, "setup.cfg"), "a") as f:
-                f.write(SAMPLE_CONFIG)
-        print(CONFIG_ERROR, file=sys.stderr)
-        return 1
-
-    print(" creating %s" % cfg.versionfile_source)
-    with open(cfg.versionfile_source, "w") as f:
-        LONG = LONG_VERSION_PY[cfg.VCS]
-        f.write(
-            LONG
-            % {
-                "DOLLAR": "$",
-                "STYLE": cfg.style,
-                "TAG_PREFIX": cfg.tag_prefix,
-                "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                "VERSIONFILE_SOURCE": cfg.versionfile_source,
-            }
-        )
-
-    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
-    if os.path.exists(ipy):
-        try:
-            with open(ipy) as f:
-                old = f.read()
-        except OSError:
-            old = ""
-        if INIT_PY_SNIPPET not in old:
-            print(" appending to %s" % ipy)
-            with open(ipy, "a") as f:
-                f.write(INIT_PY_SNIPPET)
-        else:
-            print(" %s unmodified" % ipy)
-    else:
-        print(" %s doesn't exist, ok" % ipy)
-        ipy = None
-
-    # Make sure both the top-level "versioneer.py" and versionfile_source
-    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
-    # they'll be copied into source distributions. Pip won't be able to
-    # install the package without this.
-    manifest_in = os.path.join(root, "MANIFEST.in")
-    simple_includes = set()
-    try:
-        with open(manifest_in) as f:
-            for line in f:
-                if line.startswith("include "):
-                    for include in line.split()[1:]:
-                        simple_includes.add(include)
-    except OSError:
-        pass
-    # That doesn't cover everything MANIFEST.in can do
-    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
-    # it might give some false negatives. Appending redundant 'include'
-    # lines is safe, though.
-    if "versioneer.py" not in simple_includes:
-        print(" appending 'versioneer.py' to MANIFEST.in")
-        with open(manifest_in, "a") as f:
-            f.write("include versioneer.py\n")
-    else:
-        print(" 'versioneer.py' already in MANIFEST.in")
-    if cfg.versionfile_source not in simple_includes:
-        print(
-            " appending versionfile_source ('%s') to MANIFEST.in"
-            % cfg.versionfile_source
-        )
-        with open(manifest_in, "a") as f:
-            f.write("include %s\n" % cfg.versionfile_source)
-    else:
-        print(" versionfile_source already in MANIFEST.in")
-
-    # Make VCS-specific changes. For git, this means creating/changing
-    # .gitattributes to mark _version.py for export-subst keyword
-    # substitution.
-    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
-    return 0
-
-
-def scan_setup_py():
-    """Validate the contents of setup.py against Versioneer's expectations."""
-    found = set()
-    setters = False
-    errors = 0
-    with open("setup.py") as f:
-        for line in f.readlines():
-            if "import versioneer" in line:
-                found.add("import")
-            if "versioneer.get_cmdclass()" in line:
-                found.add("cmdclass")
-            if "versioneer.get_version()" in line:
-                found.add("get_version")
-            if "versioneer.VCS" in line:
-                setters = True
-            if "versioneer.versionfile_source" in line:
-                setters = True
-    if len(found) != 3:
-        print("")
-        print("Your setup.py appears to be missing some important items")
-        print("(but I might be wrong). Please make sure it has something")
-        print("roughly like the following:")
-        print("")
-        print(" import versioneer")
-        print(" setup( version=versioneer.get_version(),")
-        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
-        print("")
-        errors += 1
-    if setters:
-        print("You should remove lines like 'versioneer.VCS = ' and")
-        print("'versioneer.versionfile_source = ' . This configuration")
-        print("now lives in setup.cfg, and should be removed from setup.py")
-        print("")
-        errors += 1
-    return errors
-
-
-if __name__ == "__main__":
-    cmd = sys.argv[1]
-    if cmd == "setup":
-        errors = do_setup()
-        errors += scan_setup_py()
-        if errors:
-            sys.exit(1)
diff --git a/python/custreamz/custreamz/_version.py b/python/custreamz/custreamz/_version.py
deleted file mode 100644
index a017486df32..00000000000
--- a/python/custreamz/custreamz/_version.py
+++ /dev/null
@@ -1,566 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "$Format:%d$"
-    git_full = "$Format:%H$"
-    git_date = "$Format:%ci$"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "pep440"
-    cfg.tag_prefix = "v"
-    cfg.parentdir_prefix = "custreamz-"
-    cfg.versionfile_source = "custreamz/_version.py"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-
-    return decorate
-
-
-def run_command(
-    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
-):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
-            break
-        except OSError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print(f"unable to find command, tried {commands}")
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs)
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except OSError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = {r.strip() for r in refnames.strip("()").split(",")}
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = {r for r in refs if re.search(r"\d", r)}
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
-            if verbose:
-                print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(
-        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
-    )
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = (
-                "unable to parse git-describe output: '%s'" % describe_out
-            )
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces[
-                "error"
-            ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'"
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(
-            GITS, ["rev-list", "HEAD", "--count"], cwd=root
-        )
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(
-            get_keywords(), cfg.tag_prefix, verbose
-        )
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split("/"):
-            root = os.path.dirname(root)
-    except NameError:
-        return {
-            "version": "0+unknown",
-            "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to find root of source tree",
-            "date": None,
-        }
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
diff --git a/python/custreamz/setup.cfg b/python/custreamz/setup.cfg
index 2ce4eaa82f0..8c038db9349 100644
--- a/python/custreamz/setup.cfg
+++ b/python/custreamz/setup.cfg
@@ -1,12 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-[versioneer]
-VCS = git
-style = pep440
-versionfile_source = custreamz/_version.py
-versionfile_build = custreamz/_version.py
-tag_prefix = v
-parentdir_prefix = custreamz-
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 [isort]
 line_length=79
diff --git a/python/custreamz/setup.py b/python/custreamz/setup.py
index 2fe12a54855..65a7aac6395 100644
--- a/python/custreamz/setup.py
+++ b/python/custreamz/setup.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
-import versioneer
 from setuptools import find_packages, setup
 
 install_requires = ["cudf_kafka", "cudf"]
@@ -9,7 +8,7 @@
 
 setup(
     name="custreamz",
-    version=versioneer.get_version(),
+    version="23.04.00",
     description="cuStreamz - GPU Accelerated Streaming",
     url="https://github.com/rapidsai/cudf",
     author="NVIDIA Corporation",
@@ -26,7 +25,6 @@
         "Programming Language :: Python :: 3.10",
     ],
     packages=find_packages(include=["custreamz", "custreamz.*"]),
-    cmdclass=versioneer.get_cmdclass(),
     install_requires=install_requires,
     extras_require=extras_require,
     zip_safe=False,
diff --git a/python/custreamz/versioneer.py b/python/custreamz/versioneer.py
deleted file mode 100644
index 9c9ddae7340..00000000000
--- a/python/custreamz/versioneer.py
+++ /dev/null
@@ -1,1904 +0,0 @@
-# Version: 0.18
-
-"""The Versioneer - like a rocketeer, but for versions.
-
-The Versioneer
-==============
-
-* like a rocketeer, but for versions!
-* https://github.com/warner/python-versioneer
-* Brian Warner
-* License: Public Domain
-* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
-* [![Latest Version]
-(https://pypip.in/version/versioneer/badge.svg?style=flat)
-](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status]
-(https://travis-ci.org/warner/python-versioneer.png?branch=master)
-](https://travis-ci.org/warner/python-versioneer)
-
-This is a tool for managing a recorded version number in distutils-based
-python projects. The goal is to remove the tedious and error-prone "update
-the embedded version string" step from your release process. Making a new
-release should be as easy as recording a new tag in your version-control
-system, and maybe making new tarballs.
-
-
-## Quick Install
-
-* `pip install versioneer` to somewhere to your $PATH
-* add a `[versioneer]` section to your setup.cfg (see below)
-* run `versioneer install` in your source tree, commit the results
-
-## Version Identifiers
-
-Source trees come from a variety of places:
-
-* a version-control system checkout (mostly used by developers)
-* a nightly tarball, produced by build automation
-* a snapshot tarball, produced by a web-based VCS browser, like github's
-  "tarball from tag" feature
-* a release tarball, produced by "setup.py sdist", distributed through PyPI
-
-Within each source tree, the version identifier (either a string or a number,
-this tool is format-agnostic) can come from a variety of places:
-
-* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
-  about recent "tags" and an absolute revision-id
-* the name of the directory into which the tarball was unpacked
-* an expanded VCS keyword ($Id$, etc)
-* a `_version.py` created by some earlier build step
-
-For released software, the version identifier is closely related to a VCS
-tag. Some projects use tag names that include more than just the version
-string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
-needs to strip the tag prefix to extract the version identifier. For
-unreleased software (between tags), the version identifier should provide
-enough information to help developers recreate the same tree, while also
-giving them an idea of roughly how old the tree is (after version 1.2, before
-version 1.3). Many VCS systems can report a description that captures this,
-for example `git describe --tags --dirty --always` reports things like
-"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
-0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes.
-
-The version identifier is used for multiple purposes:
-
-* to allow the module to self-identify its version: `myproject.__version__`
-* to choose a name and prefix for a 'setup.py sdist' tarball
-
-## Theory of Operation
-
-Versioneer works by adding a special `_version.py` file into your source
-tree, where your `__init__.py` can import it. This `_version.py` knows how to
-dynamically ask the VCS tool for version information at import time.
-
-`_version.py` also contains `$Revision$` markers, and the installation
-process marks `_version.py` to have this marker rewritten with a tag name
-during the `git archive` command. As a result, generated tarballs will
-contain enough information to get the proper version.
-
-To allow `setup.py` to compute a version too, a `versioneer.py` is added to
-the top level of your source tree, next to `setup.py` and the `setup.cfg`
-that configures it. This overrides several distutils/setuptools commands to
-compute the version when invoked, and changes `setup.py build` and `setup.py
-sdist` to replace `_version.py` with a small static file that contains just
-the generated version data.
-
-## Installation
-
-See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
-
-## Version-String Flavors
-
-Code which uses Versioneer can learn about its version string at runtime by
-importing `_version` from your main `__init__.py` file and running the
-`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
-import the top-level `versioneer.py` and run `get_versions()`.
-
-Both functions return a dictionary with different flavors of version
-information:
-
-* `['version']`: A condensed version string, rendered using the selected
-  style. This is the most commonly used value for the project's version
-  string. The default "pep440" style yields strings like `0.11`,
-  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
-  below for alternative styles.
-
-* `['full-revisionid']`: detailed revision identifier. For Git, this is the
-  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
-
-* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
-  commit date in ISO 8601 format. This will be None if the date is not
-  available.
-
-* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
-  this is only accurate if run in a VCS checkout, otherwise it is likely to
-  be False or None
-
-* `['error']`: if the version string could not be computed, this will be set
-  to a string describing the problem, otherwise it will be None. It may be
-  useful to throw an exception in setup.py if this is set, to avoid e.g.
-  creating tarballs with a version string of "unknown".
-
-Some variants are more useful than others. Including `full-revisionid` in a
-bug report should allow developers to reconstruct the exact code being tested
-(or indicate the presence of local changes that should be shared with the
-developers). `version` is suitable for display in an "about" box or a CLI
-`--version` output: it can be easily compared against release notes and lists
-of bugs fixed in various releases.
-
-The installer adds the following text to your `__init__.py` to place a basic
-version in `YOURPROJECT.__version__`:
-
-    from custreamz._version import get_versions
-    __version__ = get_versions()['version']
-    del get_versions
-
-## Styles
-
-The setup.cfg `style=` configuration controls how the VCS information is
-rendered into a version string.
-
-The default style, "pep440", produces a PEP440-compliant string, equal to the
-un-prefixed tag name for actual releases, and containing an additional "local
-version" section with more detail for in-between builds. For Git, this is
-TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
-tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
-that this commit is two revisions ("+2") beyond the "0.11" tag. For released
-software (exactly equal to a known tag), the identifier will only contain the
-stripped tag, e.g. "0.11".
-
-Other styles are available. See [details.md](details.md) in the Versioneer
-source tree for descriptions.
-
-## Debugging
-
-Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
-to return a version of "0+unknown". To investigate the problem, run `setup.py
-version`, which will run the version-lookup code in a verbose mode, and will
-display the full contents of `get_versions()` (including the `error` string,
-which may help identify what went wrong).
-
-## Known Limitations
-
-Some situations are known to cause problems for Versioneer. This details the
-most significant ones. More can be found on Github
-[issues page](https://github.com/warner/python-versioneer/issues).
-
-### Subprojects
-
-Versioneer has limited support for source trees in which `setup.py` is not in
-the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
-two common reasons why `setup.py` might not be in the root:
-
-* Source trees which contain multiple subprojects, such as
-  [Buildbot](https://github.com/buildbot/buildbot), which contains both
-  "master" and "slave" subprojects, each with their own `setup.py`,
-  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
-  distributions (and upload multiple independently-installable tarballs).
-* Source trees whose main purpose is to contain a C library, but which also
-  provide bindings to Python (and perhaps other langauges) in subdirectories.
-
-Versioneer will look for `.git` in parent directories, and most operations
-should get the right version string. However `pip` and `setuptools` have bugs
-and implementation details which frequently cause `pip install .` from a
-subproject directory to fail to find a correct version string (so it usually
-defaults to `0+unknown`).
-
-`pip install --editable .` should work correctly. `setup.py install` might
-work too.
-
-Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
-some later version.
-
-[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
-this issue. The discussion in
-[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
-issue from the Versioneer side in more detail.
-[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
-[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
-pip to let Versioneer work correctly.
-
-Versioneer-0.16 and earlier only looked for a `.git` directory next to the
-`setup.cfg`, so subprojects were completely unsupported with those releases.
-
-### Editable installs with setuptools <= 18.5
-
-`setup.py develop` and `pip install --editable .` allow you to install a
-project into a virtualenv once, then continue editing the source code (and
-test) without re-installing after every change.
-
-"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
-convenient way to specify executable scripts that should be installed along
-with the python package.
-
-These both work as expected when using modern setuptools. When using
-setuptools-18.5 or earlier, however, certain operations will cause
-`pkg_resources.DistributionNotFound` errors when running the entrypoint
-script, which must be resolved by re-installing the package. This happens
-when the install happens with one version, then the egg_info data is
-regenerated while a different version is checked out. Many setup.py commands
-cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
-a different virtualenv), so this can be surprising.
-
-[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
-this one, but upgrading to a newer version of setuptools should probably
-resolve it.
-
-### Unicode version strings
-
-While Versioneer works (and is continually tested) with both Python 2 and
-Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
-Newer releases probably generate unicode version strings on py2. It's not
-clear that this is wrong, but it may be surprising for applications when then
-write these strings to a network connection or include them in bytes-oriented
-APIs like cryptographic checksums.
-
-[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
-this question.
-
-
-## Updating Versioneer
-
-To upgrade your project to a new release of Versioneer, do the following:
-
-* install the new Versioneer (`pip install -U versioneer` or equivalent)
-* edit `setup.cfg`, if necessary, to include any new configuration settings
-  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
-* re-run `versioneer install` in your source tree, to replace
-  `SRC/_version.py`
-* commit any changed files
-
-## Future Directions
-
-This tool is designed to make it easily extended to other version-control
-systems: all VCS-specific components are in separate directories like
-src/git/ . The top-level `versioneer.py` script is assembled from these
-components by running make-versioneer.py . In the future, make-versioneer.py
-will take a VCS name as an argument, and will construct a version of
-`versioneer.py` that is specific to the given VCS. It might also take the
-configuration arguments that are currently provided manually during
-installation by editing setup.py . Alternatively, it might go the other
-direction and include code from all supported VCS systems, reducing the
-number of intermediate scripts.
-
-
-## License
-
-To make Versioneer easier to embed, all its code is dedicated to the public
-domain. The `_version.py` that it creates is also in the public domain.
-Specifically, both are released under the Creative Commons "Public Domain
-Dedication" license (CC0-1.0), as described in
-https://creativecommons.org/publicdomain/zero/1.0/ .
-
-"""
-
-from __future__ import print_function
-
-import errno
-import json
-import os
-import re
-import subprocess
-import sys
-
-try:
-    import configparser
-except ImportError:
-    import ConfigParser as configparser
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_root():
-    """Get the project root directory.
-
-    We require that all commands are run from the project root, i.e. the
-    directory that contains setup.py, setup.cfg, and versioneer.py .
-    """
-    root = os.path.realpath(os.path.abspath(os.getcwd()))
-    setup_py = os.path.join(root, "setup.py")
-    versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        # allow 'python path/to/setup.py COMMAND'
-        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
-        setup_py = os.path.join(root, "setup.py")
-        versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        err = (
-            "Versioneer was unable to run the project root directory. "
-            "Versioneer requires setup.py to be executed from "
-            "its immediate directory (like 'python setup.py COMMAND'), "
-            "or in a way that lets it use sys.argv[0] to find the root "
-            "(like 'python path/to/setup.py COMMAND')."
-        )
-        raise VersioneerBadRootError(err)
-    try:
-        # Certain runtime workflows (setup.py install/develop in a setuptools
-        # tree) execute all dependencies in a single python process, so
-        # "versioneer" may be imported multiple times, and python's shared
-        # module-import table will cache the first one. So we can't use
-        # os.path.dirname(__file__), as that will find whichever
-        # versioneer.py was first imported, even in later projects.
-        me = os.path.realpath(os.path.abspath(__file__))
-        me_dir = os.path.normcase(os.path.splitext(me)[0])
-        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
-        if me_dir != vsr_dir:
-            print(
-                "Warning: build in %s is using versioneer.py from %s"
-                % (os.path.dirname(me), versioneer_py)
-            )
-    except NameError:
-        pass
-    return root
-
-
-def get_config_from_root(root):
-    """Read the project setup.cfg file to determine Versioneer config."""
-    # This might raise EnvironmentError (if setup.cfg is missing), or
-    # configparser.NoSectionError (if it lacks a [versioneer] section), or
-    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
-    # the top of versioneer.py for instructions on writing your setup.cfg .
-    setup_cfg = os.path.join(root, "setup.cfg")
-    parser = configparser.SafeConfigParser()
-    with open(setup_cfg, "r") as f:
-        parser.readfp(f)
-    VCS = parser.get("versioneer", "VCS")  # mandatory
-
-    def get(parser, name):
-        if parser.has_option("versioneer", name):
-            return parser.get("versioneer", name)
-        return None
-
-    cfg = VersioneerConfig()
-    cfg.VCS = VCS
-    cfg.style = get(parser, "style") or ""
-    cfg.versionfile_source = get(parser, "versionfile_source")
-    cfg.versionfile_build = get(parser, "versionfile_build")
-    cfg.tag_prefix = get(parser, "tag_prefix")
-    if cfg.tag_prefix in ("''", '""'):
-        cfg.tag_prefix = ""
-    cfg.parentdir_prefix = get(parser, "parentdir_prefix")
-    cfg.verbose = get(parser, "verbose")
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-# these dictionaries contain VCS-specific tools
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-
-    return decorate
-
-
-def run_command(
-    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
-):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-LONG_VERSION_PY[
-    "git"
-] = r'''
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
-    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
-    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "%(STYLE)s"
-    cfg.tag_prefix = "%(TAG_PREFIX)s"
-    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
-    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-    return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
-                                 stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %%s" %% dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %%s" %% (commands,))
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %%s (error)" %% dispcmd)
-            print("stdout was %%s" %% stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print("Tried directories %%s but none started with prefix %%s" %%
-              (str(rootdirs), parentdir_prefix))
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %%d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%%s', no digits" %% ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %%s" %% ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %%s" %% r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                          hide_stderr=True)
-    if rc != 0:
-        if verbose:
-            print("Directory %%s not under git control" %% root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
-                                          "--always", "--long",
-                                          "--match", "%%s*" %% tag_prefix],
-                                   cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%%s'"
-                               %% describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%%s' doesn't start with prefix '%%s'"
-                print(fmt %% (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
-                               %% (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
-                                    cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
-                       cwd=root)[0].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%%d" %% pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%%d" %% pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%%s" %% pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%%s" %% pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%%s'" %% style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree",
-                "date": None}
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version", "date": None}
-'''
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r"\d", r)])
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
-            if verbose:
-                print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(
-        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
-    )
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = (
-                "unable to parse git-describe output: '%s'" % describe_out
-            )
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
-                full_tag,
-                tag_prefix,
-            )
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(
-            GITS, ["rev-list", "HEAD", "--count"], cwd=root
-        )
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def do_vcs_install(manifest_in, versionfile_source, ipy):
-    """Git-specific installation logic for Versioneer.
-
-    For Git, this means creating/changing .gitattributes to mark _version.py
-    for export-subst keyword substitution.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    files = [manifest_in, versionfile_source]
-    if ipy:
-        files.append(ipy)
-    try:
-        me = __file__
-        if me.endswith(".pyc") or me.endswith(".pyo"):
-            me = os.path.splitext(me)[0] + ".py"
-        versioneer_file = os.path.relpath(me)
-    except NameError:
-        versioneer_file = "versioneer.py"
-    files.append(versioneer_file)
-    present = False
-    try:
-        f = open(".gitattributes", "r")
-        for line in f.readlines():
-            if line.strip().startswith(versionfile_source):
-                if "export-subst" in line.strip().split()[1:]:
-                    present = True
-        f.close()
-    except EnvironmentError:
-        pass
-    if not present:
-        f = open(".gitattributes", "a+")
-        f.write("%s export-subst\n" % versionfile_source)
-        f.close()
-        files.append(".gitattributes")
-    run_command(GITS, ["add", "--"] + files)
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.18) from
-# revision-control system data, or from the parent directory name of an
-# unpacked source archive. Distribution tarballs contain a pre-generated copy
-# of this file.
-
-import json
-
-version_json = '''
-%s
-'''  # END VERSION_JSON
-
-
-def get_versions():
-    return json.loads(version_json)
-"""
-
-
-def versions_from_file(filename):
-    """Try to determine the version from _version.py if present."""
-    try:
-        with open(filename) as f:
-            contents = f.read()
-    except EnvironmentError:
-        raise NotThisMethod("unable to read _version.py")
-    mo = re.search(
-        r"version_json = '''\n(.*)'''  # END VERSION_JSON",
-        contents,
-        re.M | re.S,
-    )
-    if not mo:
-        mo = re.search(
-            r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
-            contents,
-            re.M | re.S,
-        )
-    if not mo:
-        raise NotThisMethod("no version_json in _version.py")
-    return json.loads(mo.group(1))
-
-
-def write_to_version_file(filename, versions):
-    """Write the given version number to the given _version.py file."""
-    os.unlink(filename)
-    contents = json.dumps(
-        versions, sort_keys=True, indent=1, separators=(",", ": ")
-    )
-    with open(filename, "w") as f:
-        f.write(SHORT_VERSION_PY % contents)
-
-    print("set %s to '%s'" % (filename, versions["version"]))
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
-
-
-class VersioneerBadRootError(Exception):
-    """The project root directory is unknown or missing key files."""
-
-
-def get_versions(verbose=False):
-    """Get the project version from whatever source is available.
-
-    Returns dict with two keys: 'version' and 'full'.
-    """
-    if "versioneer" in sys.modules:
-        # see the discussion in cmdclass.py:get_cmdclass()
-        del sys.modules["versioneer"]
-
-    root = get_root()
-    cfg = get_config_from_root(root)
-
-    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
-    handlers = HANDLERS.get(cfg.VCS)
-    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
-    verbose = verbose or cfg.verbose
-    assert (
-        cfg.versionfile_source is not None
-    ), "please set versioneer.versionfile_source"
-    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
-
-    versionfile_abs = os.path.join(root, cfg.versionfile_source)
-
-    # extract version from first of: _version.py, VCS command (e.g. 'git
-    # describe'), parentdir. This is meant to work for developers using a
-    # source checkout, for users of a tarball created by 'setup.py sdist',
-    # and for users of a tarball/zipball created by 'git archive' or github's
-    # download-from-tag feature or the equivalent in other VCSes.
-
-    get_keywords_f = handlers.get("get_keywords")
-    from_keywords_f = handlers.get("keywords")
-    if get_keywords_f and from_keywords_f:
-        try:
-            keywords = get_keywords_f(versionfile_abs)
-            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
-            if verbose:
-                print("got version from expanded keyword %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        ver = versions_from_file(versionfile_abs)
-        if verbose:
-            print("got version from file %s %s" % (versionfile_abs, ver))
-        return ver
-    except NotThisMethod:
-        pass
-
-    from_vcs_f = handlers.get("pieces_from_vcs")
-    if from_vcs_f:
-        try:
-            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
-            ver = render(pieces, cfg.style)
-            if verbose:
-                print("got version from VCS %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        if cfg.parentdir_prefix:
-            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-            if verbose:
-                print("got version from parentdir %s" % ver)
-            return ver
-    except NotThisMethod:
-        pass
-
-    if verbose:
-        print("unable to compute version")
-
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
-
-
-def get_version():
-    """Get the short version string for this project."""
-    return get_versions()["version"]
-
-
-def get_cmdclass():
-    """Get the custom setuptools/distutils subclasses used by Versioneer."""
-    if "versioneer" in sys.modules:
-        del sys.modules["versioneer"]
-        # this fixes the "python setup.py develop" case (also 'install' and
-        # 'easy_install .'), in which subdependencies of the main project are
-        # built (using setup.py bdist_egg) in the same python process. Assume
-        # a main project A and a dependency B, which use different versions
-        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
-        # sys.modules by the time B's setup.py is executed, causing B to run
-        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
-        # sandbox that restores sys.modules to it's pre-build state, so the
-        # parent is protected against the child's "import versioneer". By
-        # removing ourselves from sys.modules here, before the child build
-        # happens, we protect the child from the parent's versioneer too.
-        # Also see https://github.com/warner/python-versioneer/issues/52
-
-    cmds = {}
-
-    # we add "version" to both distutils and setuptools
-    from distutils.core import Command
-
-    class cmd_version(Command):
-        description = "report generated version string"
-        user_options = []
-        boolean_options = []
-
-        def initialize_options(self):
-            pass
-
-        def finalize_options(self):
-            pass
-
-        def run(self):
-            vers = get_versions(verbose=True)
-            print("Version: %s" % vers["version"])
-            print(" full-revisionid: %s" % vers.get("full-revisionid"))
-            print(" dirty: %s" % vers.get("dirty"))
-            print(" date: %s" % vers.get("date"))
-            if vers["error"]:
-                print(" error: %s" % vers["error"])
-
-    cmds["version"] = cmd_version
-
-    # we override "build_py" in both distutils and setuptools
-    #
-    # most invocation pathways end up running build_py:
-    #  distutils/build -> build_py
-    #  distutils/install -> distutils/build ->..
-    #  setuptools/bdist_wheel -> distutils/install ->..
-    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
-    #  setuptools/install -> bdist_egg ->..
-    #  setuptools/develop -> ?
-    #  pip install:
-    #   copies source tree to a tempdir before running egg_info/etc
-    #   if .git isn't copied too, 'git describe' will fail
-    #   then does setup.py bdist_wheel, or sometimes setup.py install
-    #  setup.py egg_info -> ?
-
-    # we override different "build_py" commands for both environments
-    if "setuptools" in sys.modules:
-        from setuptools.command.build_py import build_py as _build_py
-    else:
-        from distutils.command.build_py import build_py as _build_py
-
-    class cmd_build_py(_build_py):
-        def run(self):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            versions = get_versions()
-            _build_py.run(self)
-            # now locate _version.py in the new build/ directory and replace
-            # it with an updated value
-            if cfg.versionfile_build:
-                target_versionfile = os.path.join(
-                    self.build_lib, cfg.versionfile_build
-                )
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-    cmds["build_py"] = cmd_build_py
-
-    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
-        from cx_Freeze.dist import build_exe as _build_exe
-
-        # nczeczulin reports that py2exe won't like the pep440-style string
-        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
-        # setup(console=[{
-        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
-        #   "product_version": versioneer.get_version(),
-        #   ...
-
-        class cmd_build_exe(_build_exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _build_exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(
-                        LONG
-                        % {
-                            "DOLLAR": "$",
-                            "STYLE": cfg.style,
-                            "TAG_PREFIX": cfg.tag_prefix,
-                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        }
-                    )
-
-        cmds["build_exe"] = cmd_build_exe
-        del cmds["build_py"]
-
-    if "py2exe" in sys.modules:  # py2exe enabled?
-        try:
-            from py2exe.distutils_buildexe import py2exe as _py2exe  # py3
-        except ImportError:
-            from py2exe.build_exe import py2exe as _py2exe  # py2
-
-        class cmd_py2exe(_py2exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _py2exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(
-                        LONG
-                        % {
-                            "DOLLAR": "$",
-                            "STYLE": cfg.style,
-                            "TAG_PREFIX": cfg.tag_prefix,
-                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        }
-                    )
-
-        cmds["py2exe"] = cmd_py2exe
-
-    # we override different "sdist" commands for both environments
-    if "setuptools" in sys.modules:
-        from setuptools.command.sdist import sdist as _sdist
-    else:
-        from distutils.command.sdist import sdist as _sdist
-
-    class cmd_sdist(_sdist):
-        def run(self):
-            versions = get_versions()
-            self._versioneer_generated_versions = versions
-            # unless we update this, the command will keep using the old
-            # version
-            self.distribution.metadata.version = versions["version"]
-            return _sdist.run(self)
-
-        def make_release_tree(self, base_dir, files):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            _sdist.make_release_tree(self, base_dir, files)
-            # now locate _version.py in the new base_dir directory
-            # (remembering that it may be a hardlink) and replace it with an
-            # updated value
-            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
-            print("UPDATING %s" % target_versionfile)
-            write_to_version_file(
-                target_versionfile, self._versioneer_generated_versions
-            )
-
-    cmds["sdist"] = cmd_sdist
-
-    return cmds
-
-
-CONFIG_ERROR = """
-setup.cfg is missing the necessary Versioneer configuration. You need
-a section like:
-
- [versioneer]
- VCS = git
- style = pep440
- versionfile_source = src/myproject/_version.py
- versionfile_build = myproject/_version.py
- tag_prefix =
- parentdir_prefix = myproject-
-
-You will also need to edit your setup.py to use the results:
-
- import versioneer
- setup(version=versioneer.get_version(),
-       cmdclass=versioneer.get_cmdclass(), ...)
-
-Please read the docstring in ./versioneer.py for configuration instructions,
-edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
-"""
-
-SAMPLE_CONFIG = """
-# See the docstring in versioneer.py for instructions. Note that you must
-# re-run 'versioneer.py setup' after changing this section, and commit the
-# resulting files.
-
-[versioneer]
-#VCS = git
-#style = pep440
-#versionfile_source =
-#versionfile_build =
-#tag_prefix =
-#parentdir_prefix =
-
-"""
-
-INIT_PY_SNIPPET = """
-from custreamz._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
-"""
-
-
-def do_setup():
-    """Main VCS-independent setup function for installing Versioneer."""
-    root = get_root()
-    try:
-        cfg = get_config_from_root(root)
-    except (
-        EnvironmentError,
-        configparser.NoSectionError,
-        configparser.NoOptionError,
-    ) as e:
-        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
-            print(
-                "Adding sample versioneer config to setup.cfg", file=sys.stderr
-            )
-            with open(os.path.join(root, "setup.cfg"), "a") as f:
-                f.write(SAMPLE_CONFIG)
-        print(CONFIG_ERROR, file=sys.stderr)
-        return 1
-
-    print(" creating %s" % cfg.versionfile_source)
-    with open(cfg.versionfile_source, "w") as f:
-        LONG = LONG_VERSION_PY[cfg.VCS]
-        f.write(
-            LONG
-            % {
-                "DOLLAR": "$",
-                "STYLE": cfg.style,
-                "TAG_PREFIX": cfg.tag_prefix,
-                "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                "VERSIONFILE_SOURCE": cfg.versionfile_source,
-            }
-        )
-
-    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
-    if os.path.exists(ipy):
-        try:
-            with open(ipy, "r") as f:
-                old = f.read()
-        except EnvironmentError:
-            old = ""
-        if INIT_PY_SNIPPET not in old:
-            print(" appending to %s" % ipy)
-            with open(ipy, "a") as f:
-                f.write(INIT_PY_SNIPPET)
-        else:
-            print(" %s unmodified" % ipy)
-    else:
-        print(" %s doesn't exist, ok" % ipy)
-        ipy = None
-
-    # Make sure both the top-level "versioneer.py" and versionfile_source
-    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
-    # they'll be copied into source distributions. Pip won't be able to
-    # install the package without this.
-    manifest_in = os.path.join(root, "MANIFEST.in")
-    simple_includes = set()
-    try:
-        with open(manifest_in, "r") as f:
-            for line in f:
-                if line.startswith("include "):
-                    for include in line.split()[1:]:
-                        simple_includes.add(include)
-    except EnvironmentError:
-        pass
-    # That doesn't cover everything MANIFEST.in can do
-    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
-    # it might give some false negatives. Appending redundant 'include'
-    # lines is safe, though.
-    if "versioneer.py" not in simple_includes:
-        print(" appending 'versioneer.py' to MANIFEST.in")
-        with open(manifest_in, "a") as f:
-            f.write("include versioneer.py\n")
-    else:
-        print(" 'versioneer.py' already in MANIFEST.in")
-    if cfg.versionfile_source not in simple_includes:
-        print(
-            " appending versionfile_source ('%s') to MANIFEST.in"
-            % cfg.versionfile_source
-        )
-        with open(manifest_in, "a") as f:
-            f.write("include %s\n" % cfg.versionfile_source)
-    else:
-        print(" versionfile_source already in MANIFEST.in")
-
-    # Make VCS-specific changes. For git, this means creating/changing
-    # .gitattributes to mark _version.py for export-subst keyword
-    # substitution.
-    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
-    return 0
-
-
-def scan_setup_py():
-    """Validate the contents of setup.py against Versioneer's expectations."""
-    found = set()
-    setters = False
-    errors = 0
-    with open("setup.py", "r") as f:
-        for line in f.readlines():
-            if "import versioneer" in line:
-                found.add("import")
-            if "versioneer.get_cmdclass()" in line:
-                found.add("cmdclass")
-            if "versioneer.get_version()" in line:
-                found.add("get_version")
-            if "versioneer.VCS" in line:
-                setters = True
-            if "versioneer.versionfile_source" in line:
-                setters = True
-    if len(found) != 3:
-        print("")
-        print("Your setup.py appears to be missing some important items")
-        print("(but I might be wrong). Please make sure it has something")
-        print("roughly like the following:")
-        print("")
-        print(" import versioneer")
-        print(" setup( version=versioneer.get_version(),")
-        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
-        print("")
-        errors += 1
-    if setters:
-        print("You should remove lines like 'versioneer.VCS = ' and")
-        print("'versioneer.versionfile_source = ' . This configuration")
-        print("now lives in setup.cfg, and should be removed from setup.py")
-        print("")
-        errors += 1
-    return errors
-
-
-if __name__ == "__main__":
-    cmd = sys.argv[1]
-    if cmd == "setup":
-        errors = do_setup()
-        errors += scan_setup_py()
-        if errors:
-            sys.exit(1)
diff --git a/python/dask_cudf/dask_cudf/__init__.py b/python/dask_cudf/dask_cudf/__init__.py
index 5e3a9342c25..010e4a104b2 100644
--- a/python/dask_cudf/dask_cudf/__init__.py
+++ b/python/dask_cudf/dask_cudf/__init__.py
@@ -1,9 +1,8 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 from dask.dataframe import from_delayed
 
 import cudf
-from cudf._version import get_versions
 
 from . import backends
 from .core import DataFrame, Series, concat, from_cudf, from_dask_dataframe
@@ -15,8 +14,7 @@
 except ImportError:
     pass
 
-__version__ = get_versions()["version"]
-del get_versions
+__version__ = "23.04.00"
 
 __all__ = [
     "DataFrame",
diff --git a/python/dask_cudf/dask_cudf/_version.py b/python/dask_cudf/dask_cudf/_version.py
deleted file mode 100644
index f0dbcac0017..00000000000
--- a/python/dask_cudf/dask_cudf/_version.py
+++ /dev/null
@@ -1,566 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "$Format:%d$"
-    git_full = "$Format:%H$"
-    git_date = "$Format:%ci$"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "pep440"
-    cfg.tag_prefix = ""
-    cfg.parentdir_prefix = "dask_cudf-"
-    cfg.versionfile_source = "dask_cudf/_version.py"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-
-    return decorate
-
-
-def run_command(
-    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
-):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
-            break
-        except OSError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print(f"unable to find command, tried {commands}")
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs)
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except OSError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = {r.strip() for r in refnames.strip("()").split(",")}
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = {r for r in refs if re.search(r"\d", r)}
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
-            if verbose:
-                print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(
-        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
-    )
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = (
-                "unable to parse git-describe output: '%s'" % describe_out
-            )
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces[
-                "error"
-            ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'"
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(
-            GITS, ["rev-list", "HEAD", "--count"], cwd=root
-        )
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(
-            get_keywords(), cfg.tag_prefix, verbose
-        )
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split("/"):
-            root = os.path.dirname(root)
-    except NameError:
-        return {
-            "version": "0+unknown",
-            "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to find root of source tree",
-            "date": None,
-        }
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
diff --git a/python/dask_cudf/setup.cfg b/python/dask_cudf/setup.cfg
index f45bdf00430..66f4b8891d0 100644
--- a/python/dask_cudf/setup.cfg
+++ b/python/dask_cudf/setup.cfg
@@ -1,12 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-[versioneer]
-VCS = git
-style = pep440
-versionfile_source = dask_cudf/_version.py
-versionfile_build = dask_cudf/_version.py
-tag_prefix =
-parentdir_prefix = dask_cudf-
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 [isort]
 line_length=79
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 04145d23978..8611f2379f7 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -1,19 +1,14 @@
 # Copyright (c) 2019-2023, NVIDIA CORPORATION.
 
-import os
-
-import versioneer
 from setuptools import find_packages, setup
 
-cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="")
-
 install_requires = [
     "dask>=2023.1.1",
     "distributed>=2023.1.1",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",
-    f"cudf{cuda_suffix}==23.4.*",
+    "cudf==23.4.*",
     "cupy-cuda11x",
 ]
 
@@ -27,21 +22,9 @@
     ]
 }
 
-if "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE" in os.environ:
-    orig_get_versions = versioneer.get_versions
-
-    version_override = os.environ["RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE"]
-
-    def get_versions():
-        data = orig_get_versions()
-        data["version"] = version_override
-        return data
-
-    versioneer.get_versions = get_versions
-
 setup(
-    name=f"dask-cudf{cuda_suffix}",
-    version=versioneer.get_version(),
+    name="dask-cudf",
+    version="23.04.00",
     description="Utilities for Dask and cuDF interactions",
     url="https://github.com/rapidsai/cudf",
     author="NVIDIA Corporation",
@@ -57,7 +40,6 @@ def get_versions():
         "Programming Language :: Python :: 3.10",
     ],
     packages=find_packages(exclude=["tests", "tests.*"]),
-    cmdclass=versioneer.get_cmdclass(),
     install_requires=install_requires,
     extras_require=extras_require,
 )
diff --git a/python/dask_cudf/versioneer.py b/python/dask_cudf/versioneer.py
deleted file mode 100644
index a560f2e8797..00000000000
--- a/python/dask_cudf/versioneer.py
+++ /dev/null
@@ -1,1904 +0,0 @@
-# Version: 0.18
-
-"""The Versioneer - like a rocketeer, but for versions.
-
-The Versioneer
-==============
-
-* like a rocketeer, but for versions!
-* https://github.com/warner/python-versioneer
-* Brian Warner
-* License: Public Domain
-* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
-* [![Latest Version]
-(https://pypip.in/version/versioneer/badge.svg?style=flat)
-](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status]
-(https://travis-ci.org/warner/python-versioneer.png?branch=master)
-](https://travis-ci.org/warner/python-versioneer)
-
-This is a tool for managing a recorded version number in distutils-based
-python projects. The goal is to remove the tedious and error-prone "update
-the embedded version string" step from your release process. Making a new
-release should be as easy as recording a new tag in your version-control
-system, and maybe making new tarballs.
-
-
-## Quick Install
-
-* `pip install versioneer` to somewhere to your $PATH
-* add a `[versioneer]` section to your setup.cfg (see below)
-* run `versioneer install` in your source tree, commit the results
-
-## Version Identifiers
-
-Source trees come from a variety of places:
-
-* a version-control system checkout (mostly used by developers)
-* a nightly tarball, produced by build automation
-* a snapshot tarball, produced by a web-based VCS browser, like github's
-  "tarball from tag" feature
-* a release tarball, produced by "setup.py sdist", distributed through PyPI
-
-Within each source tree, the version identifier (either a string or a number,
-this tool is format-agnostic) can come from a variety of places:
-
-* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
-  about recent "tags" and an absolute revision-id
-* the name of the directory into which the tarball was unpacked
-* an expanded VCS keyword ($Id$, etc)
-* a `_version.py` created by some earlier build step
-
-For released software, the version identifier is closely related to a VCS
-tag. Some projects use tag names that include more than just the version
-string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
-needs to strip the tag prefix to extract the version identifier. For
-unreleased software (between tags), the version identifier should provide
-enough information to help developers recreate the same tree, while also
-giving them an idea of roughly how old the tree is (after version 1.2, before
-version 1.3). Many VCS systems can report a description that captures this,
-for example `git describe --tags --dirty --always` reports things like
-"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
-0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes.
-
-The version identifier is used for multiple purposes:
-
-* to allow the module to self-identify its version: `myproject.__version__`
-* to choose a name and prefix for a 'setup.py sdist' tarball
-
-## Theory of Operation
-
-Versioneer works by adding a special `_version.py` file into your source
-tree, where your `__init__.py` can import it. This `_version.py` knows how to
-dynamically ask the VCS tool for version information at import time.
-
-`_version.py` also contains `$Revision$` markers, and the installation
-process marks `_version.py` to have this marker rewritten with a tag name
-during the `git archive` command. As a result, generated tarballs will
-contain enough information to get the proper version.
-
-To allow `setup.py` to compute a version too, a `versioneer.py` is added to
-the top level of your source tree, next to `setup.py` and the `setup.cfg`
-that configures it. This overrides several distutils/setuptools commands to
-compute the version when invoked, and changes `setup.py build` and `setup.py
-sdist` to replace `_version.py` with a small static file that contains just
-the generated version data.
-
-## Installation
-
-See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
-
-## Version-String Flavors
-
-Code which uses Versioneer can learn about its version string at runtime by
-importing `_version` from your main `__init__.py` file and running the
-`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
-import the top-level `versioneer.py` and run `get_versions()`.
-
-Both functions return a dictionary with different flavors of version
-information:
-
-* `['version']`: A condensed version string, rendered using the selected
-  style. This is the most commonly used value for the project's version
-  string. The default "pep440" style yields strings like `0.11`,
-  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
-  below for alternative styles.
-
-* `['full-revisionid']`: detailed revision identifier. For Git, this is the
-  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
-
-* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
-  commit date in ISO 8601 format. This will be None if the date is not
-  available.
-
-* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
-  this is only accurate if run in a VCS checkout, otherwise it is likely to
-  be False or None
-
-* `['error']`: if the version string could not be computed, this will be set
-  to a string describing the problem, otherwise it will be None. It may be
-  useful to throw an exception in setup.py if this is set, to avoid e.g.
-  creating tarballs with a version string of "unknown".
-
-Some variants are more useful than others. Including `full-revisionid` in a
-bug report should allow developers to reconstruct the exact code being tested
-(or indicate the presence of local changes that should be shared with the
-developers). `version` is suitable for display in an "about" box or a CLI
-`--version` output: it can be easily compared against release notes and lists
-of bugs fixed in various releases.
-
-The installer adds the following text to your `__init__.py` to place a basic
-version in `YOURPROJECT.__version__`:
-
-    from ._version import get_versions
-    __version__ = get_versions()['version']
-    del get_versions
-
-## Styles
-
-The setup.cfg `style=` configuration controls how the VCS information is
-rendered into a version string.
-
-The default style, "pep440", produces a PEP440-compliant string, equal to the
-un-prefixed tag name for actual releases, and containing an additional "local
-version" section with more detail for in-between builds. For Git, this is
-TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
-tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
-that this commit is two revisions ("+2") beyond the "0.11" tag. For released
-software (exactly equal to a known tag), the identifier will only contain the
-stripped tag, e.g. "0.11".
-
-Other styles are available. See [details.md](details.md) in the Versioneer
-source tree for descriptions.
-
-## Debugging
-
-Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
-to return a version of "0+unknown". To investigate the problem, run `setup.py
-version`, which will run the version-lookup code in a verbose mode, and will
-display the full contents of `get_versions()` (including the `error` string,
-which may help identify what went wrong).
-
-## Known Limitations
-
-Some situations are known to cause problems for Versioneer. This details the
-most significant ones. More can be found on Github
-[issues page](https://github.com/warner/python-versioneer/issues).
-
-### Subprojects
-
-Versioneer has limited support for source trees in which `setup.py` is not in
-the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
-two common reasons why `setup.py` might not be in the root:
-
-* Source trees which contain multiple subprojects, such as
-  [Buildbot](https://github.com/buildbot/buildbot), which contains both
-  "master" and "slave" subprojects, each with their own `setup.py`,
-  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
-  distributions (and upload multiple independently-installable tarballs).
-* Source trees whose main purpose is to contain a C library, but which also
-  provide bindings to Python (and perhaps other langauges) in subdirectories.
-
-Versioneer will look for `.git` in parent directories, and most operations
-should get the right version string. However `pip` and `setuptools` have bugs
-and implementation details which frequently cause `pip install .` from a
-subproject directory to fail to find a correct version string (so it usually
-defaults to `0+unknown`).
-
-`pip install --editable .` should work correctly. `setup.py install` might
-work too.
-
-Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
-some later version.
-
-[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
-this issue. The discussion in
-[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
-issue from the Versioneer side in more detail.
-[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
-[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
-pip to let Versioneer work correctly.
-
-Versioneer-0.16 and earlier only looked for a `.git` directory next to the
-`setup.cfg`, so subprojects were completely unsupported with those releases.
-
-### Editable installs with setuptools <= 18.5
-
-`setup.py develop` and `pip install --editable .` allow you to install a
-project into a virtualenv once, then continue editing the source code (and
-test) without re-installing after every change.
-
-"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
-convenient way to specify executable scripts that should be installed along
-with the python package.
-
-These both work as expected when using modern setuptools. When using
-setuptools-18.5 or earlier, however, certain operations will cause
-`pkg_resources.DistributionNotFound` errors when running the entrypoint
-script, which must be resolved by re-installing the package. This happens
-when the install happens with one version, then the egg_info data is
-regenerated while a different version is checked out. Many setup.py commands
-cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
-a different virtualenv), so this can be surprising.
-
-[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
-this one, but upgrading to a newer version of setuptools should probably
-resolve it.
-
-### Unicode version strings
-
-While Versioneer works (and is continually tested) with both Python 2 and
-Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
-Newer releases probably generate unicode version strings on py2. It's not
-clear that this is wrong, but it may be surprising for applications when then
-write these strings to a network connection or include them in bytes-oriented
-APIs like cryptographic checksums.
-
-[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
-this question.
-
-
-## Updating Versioneer
-
-To upgrade your project to a new release of Versioneer, do the following:
-
-* install the new Versioneer (`pip install -U versioneer` or equivalent)
-* edit `setup.cfg`, if necessary, to include any new configuration settings
-  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
-* re-run `versioneer install` in your source tree, to replace
-  `SRC/_version.py`
-* commit any changed files
-
-## Future Directions
-
-This tool is designed to make it easily extended to other version-control
-systems: all VCS-specific components are in separate directories like
-src/git/ . The top-level `versioneer.py` script is assembled from these
-components by running make-versioneer.py . In the future, make-versioneer.py
-will take a VCS name as an argument, and will construct a version of
-`versioneer.py` that is specific to the given VCS. It might also take the
-configuration arguments that are currently provided manually during
-installation by editing setup.py . Alternatively, it might go the other
-direction and include code from all supported VCS systems, reducing the
-number of intermediate scripts.
-
-
-## License
-
-To make Versioneer easier to embed, all its code is dedicated to the public
-domain. The `_version.py` that it creates is also in the public domain.
-Specifically, both are released under the Creative Commons "Public Domain
-Dedication" license (CC0-1.0), as described in
-https://creativecommons.org/publicdomain/zero/1.0/ .
-
-"""
-
-from __future__ import print_function
-
-import errno
-import json
-import os
-import re
-import subprocess
-import sys
-
-try:
-    import configparser
-except ImportError:
-    import ConfigParser as configparser
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_root():
-    """Get the project root directory.
-
-    We require that all commands are run from the project root, i.e. the
-    directory that contains setup.py, setup.cfg, and versioneer.py .
-    """
-    root = os.path.realpath(os.path.abspath(os.getcwd()))
-    setup_py = os.path.join(root, "setup.py")
-    versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        # allow 'python path/to/setup.py COMMAND'
-        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
-        setup_py = os.path.join(root, "setup.py")
-        versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        err = (
-            "Versioneer was unable to run the project root directory. "
-            "Versioneer requires setup.py to be executed from "
-            "its immediate directory (like 'python setup.py COMMAND'), "
-            "or in a way that lets it use sys.argv[0] to find the root "
-            "(like 'python path/to/setup.py COMMAND')."
-        )
-        raise VersioneerBadRootError(err)
-    try:
-        # Certain runtime workflows (setup.py install/develop in a setuptools
-        # tree) execute all dependencies in a single python process, so
-        # "versioneer" may be imported multiple times, and python's shared
-        # module-import table will cache the first one. So we can't use
-        # os.path.dirname(__file__), as that will find whichever
-        # versioneer.py was first imported, even in later projects.
-        me = os.path.realpath(os.path.abspath(__file__))
-        me_dir = os.path.normcase(os.path.splitext(me)[0])
-        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
-        if me_dir != vsr_dir:
-            print(
-                "Warning: build in %s is using versioneer.py from %s"
-                % (os.path.dirname(me), versioneer_py)
-            )
-    except NameError:
-        pass
-    return root
-
-
-def get_config_from_root(root):
-    """Read the project setup.cfg file to determine Versioneer config."""
-    # This might raise EnvironmentError (if setup.cfg is missing), or
-    # configparser.NoSectionError (if it lacks a [versioneer] section), or
-    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
-    # the top of versioneer.py for instructions on writing your setup.cfg .
-    setup_cfg = os.path.join(root, "setup.cfg")
-    parser = configparser.SafeConfigParser()
-    with open(setup_cfg, "r") as f:
-        parser.readfp(f)
-    VCS = parser.get("versioneer", "VCS")  # mandatory
-
-    def get(parser, name):
-        if parser.has_option("versioneer", name):
-            return parser.get("versioneer", name)
-        return None
-
-    cfg = VersioneerConfig()
-    cfg.VCS = VCS
-    cfg.style = get(parser, "style") or ""
-    cfg.versionfile_source = get(parser, "versionfile_source")
-    cfg.versionfile_build = get(parser, "versionfile_build")
-    cfg.tag_prefix = get(parser, "tag_prefix")
-    if cfg.tag_prefix in ("''", '""'):
-        cfg.tag_prefix = ""
-    cfg.parentdir_prefix = get(parser, "parentdir_prefix")
-    cfg.verbose = get(parser, "verbose")
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-# these dictionaries contain VCS-specific tools
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-
-    return decorate
-
-
-def run_command(
-    commands, args, cwd=None, verbose=False, hide_stderr=False, env=None
-):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen(
-                [c] + args,
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=(subprocess.PIPE if hide_stderr else None),
-            )
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-LONG_VERSION_PY[
-    "git"
-] = r'''
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
-    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
-    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "%(STYLE)s"
-    cfg.tag_prefix = "%(TAG_PREFIX)s"
-    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
-    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-    return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
-                                 stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %%s" %% dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %%s" %% (commands,))
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %%s (error)" %% dispcmd)
-            print("stdout was %%s" %% stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print("Tried directories %%s but none started with prefix %%s" %%
-              (str(rootdirs), parentdir_prefix))
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %%d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%%s', no digits" %% ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %%s" %% ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %%s" %% r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                          hide_stderr=True)
-    if rc != 0:
-        if verbose:
-            print("Directory %%s not under git control" %% root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
-                                          "--always", "--long",
-                                          "--match", "%%s*" %% tag_prefix],
-                                   cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%%s'"
-                               %% describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%%s' doesn't start with prefix '%%s'"
-                print(fmt %% (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
-                               %% (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
-                                    cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
-                       cwd=root)[0].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%%d" %% pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%%d" %% pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%%s" %% pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%%s" %% pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%%s'" %% style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree",
-                "date": None}
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version", "date": None}
-'''
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r"\d", r)])
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix) :]
-            if verbose:
-                print("picking %s" % r)
-            return {
-                "version": r,
-                "full-revisionid": keywords["full"].strip(),
-                "dirty": False,
-                "error": None,
-                "date": date,
-            }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {
-        "version": "0+unknown",
-        "full-revisionid": keywords["full"].strip(),
-        "dirty": False,
-        "error": "no suitable tags",
-        "date": None,
-    }
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(
-        GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True
-    )
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(
-        GITS,
-        [
-            "describe",
-            "--tags",
-            "--dirty",
-            "--always",
-            "--long",
-            "--match",
-            "%s*" % tag_prefix,
-        ],
-        cwd=root,
-    )
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[: git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = (
-                "unable to parse git-describe output: '%s'" % describe_out
-            )
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
-                full_tag,
-                tag_prefix,
-            )
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(
-            GITS, ["rev-list", "HEAD", "--count"], cwd=root
-        )
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
-        0
-    ].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def do_vcs_install(manifest_in, versionfile_source, ipy):
-    """Git-specific installation logic for Versioneer.
-
-    For Git, this means creating/changing .gitattributes to mark _version.py
-    for export-subst keyword substitution.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    files = [manifest_in, versionfile_source]
-    if ipy:
-        files.append(ipy)
-    try:
-        me = __file__
-        if me.endswith(".pyc") or me.endswith(".pyo"):
-            me = os.path.splitext(me)[0] + ".py"
-        versioneer_file = os.path.relpath(me)
-    except NameError:
-        versioneer_file = "versioneer.py"
-    files.append(versioneer_file)
-    present = False
-    try:
-        f = open(".gitattributes", "r")
-        for line in f.readlines():
-            if line.strip().startswith(versionfile_source):
-                if "export-subst" in line.strip().split()[1:]:
-                    present = True
-        f.close()
-    except EnvironmentError:
-        pass
-    if not present:
-        f = open(".gitattributes", "a+")
-        f.write("%s export-subst\n" % versionfile_source)
-        f.close()
-        files.append(".gitattributes")
-    run_command(GITS, ["add", "--"] + files)
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {
-                "version": dirname[len(parentdir_prefix) :],
-                "full-revisionid": None,
-                "dirty": False,
-                "error": None,
-                "date": None,
-            }
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print(
-            "Tried directories %s but none started with prefix %s"
-            % (str(rootdirs), parentdir_prefix)
-        )
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.18) from
-# revision-control system data, or from the parent directory name of an
-# unpacked source archive. Distribution tarballs contain a pre-generated copy
-# of this file.
-
-import json
-
-version_json = '''
-%s
-'''  # END VERSION_JSON
-
-
-def get_versions():
-    return json.loads(version_json)
-"""
-
-
-def versions_from_file(filename):
-    """Try to determine the version from _version.py if present."""
-    try:
-        with open(filename) as f:
-            contents = f.read()
-    except EnvironmentError:
-        raise NotThisMethod("unable to read _version.py")
-    mo = re.search(
-        r"version_json = '''\n(.*)'''  # END VERSION_JSON",
-        contents,
-        re.M | re.S,
-    )
-    if not mo:
-        mo = re.search(
-            r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
-            contents,
-            re.M | re.S,
-        )
-    if not mo:
-        raise NotThisMethod("no version_json in _version.py")
-    return json.loads(mo.group(1))
-
-
-def write_to_version_file(filename, versions):
-    """Write the given version number to the given _version.py file."""
-    os.unlink(filename)
-    contents = json.dumps(
-        versions, sort_keys=True, indent=1, separators=(",", ": ")
-    )
-    with open(filename, "w") as f:
-        f.write(SHORT_VERSION_PY % contents)
-
-    print("set %s to '%s'" % (filename, versions["version"]))
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {
-            "version": "unknown",
-            "full-revisionid": pieces.get("long"),
-            "dirty": None,
-            "error": pieces["error"],
-            "date": None,
-        }
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {
-        "version": rendered,
-        "full-revisionid": pieces["long"],
-        "dirty": pieces["dirty"],
-        "error": None,
-        "date": pieces.get("date"),
-    }
-
-
-class VersioneerBadRootError(Exception):
-    """The project root directory is unknown or missing key files."""
-
-
-def get_versions(verbose=False):
-    """Get the project version from whatever source is available.
-
-    Returns dict with two keys: 'version' and 'full'.
-    """
-    if "versioneer" in sys.modules:
-        # see the discussion in cmdclass.py:get_cmdclass()
-        del sys.modules["versioneer"]
-
-    root = get_root()
-    cfg = get_config_from_root(root)
-
-    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
-    handlers = HANDLERS.get(cfg.VCS)
-    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
-    verbose = verbose or cfg.verbose
-    assert (
-        cfg.versionfile_source is not None
-    ), "please set versioneer.versionfile_source"
-    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
-
-    versionfile_abs = os.path.join(root, cfg.versionfile_source)
-
-    # extract version from first of: _version.py, VCS command (e.g. 'git
-    # describe'), parentdir. This is meant to work for developers using a
-    # source checkout, for users of a tarball created by 'setup.py sdist',
-    # and for users of a tarball/zipball created by 'git archive' or github's
-    # download-from-tag feature or the equivalent in other VCSes.
-
-    get_keywords_f = handlers.get("get_keywords")
-    from_keywords_f = handlers.get("keywords")
-    if get_keywords_f and from_keywords_f:
-        try:
-            keywords = get_keywords_f(versionfile_abs)
-            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
-            if verbose:
-                print("got version from expanded keyword %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        ver = versions_from_file(versionfile_abs)
-        if verbose:
-            print("got version from file %s %s" % (versionfile_abs, ver))
-        return ver
-    except NotThisMethod:
-        pass
-
-    from_vcs_f = handlers.get("pieces_from_vcs")
-    if from_vcs_f:
-        try:
-            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
-            ver = render(pieces, cfg.style)
-            if verbose:
-                print("got version from VCS %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        if cfg.parentdir_prefix:
-            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-            if verbose:
-                print("got version from parentdir %s" % ver)
-            return ver
-    except NotThisMethod:
-        pass
-
-    if verbose:
-        print("unable to compute version")
-
-    return {
-        "version": "0+unknown",
-        "full-revisionid": None,
-        "dirty": None,
-        "error": "unable to compute version",
-        "date": None,
-    }
-
-
-def get_version():
-    """Get the short version string for this project."""
-    return get_versions()["version"]
-
-
-def get_cmdclass():
-    """Get the custom setuptools/distutils subclasses used by Versioneer."""
-    if "versioneer" in sys.modules:
-        del sys.modules["versioneer"]
-        # this fixes the "python setup.py develop" case (also 'install' and
-        # 'easy_install .'), in which subdependencies of the main project are
-        # built (using setup.py bdist_egg) in the same python process. Assume
-        # a main project A and a dependency B, which use different versions
-        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
-        # sys.modules by the time B's setup.py is executed, causing B to run
-        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
-        # sandbox that restores sys.modules to it's pre-build state, so the
-        # parent is protected against the child's "import versioneer". By
-        # removing ourselves from sys.modules here, before the child build
-        # happens, we protect the child from the parent's versioneer too.
-        # Also see https://github.com/warner/python-versioneer/issues/52
-
-    cmds = {}
-
-    # we add "version" to both distutils and setuptools
-    from distutils.core import Command
-
-    class cmd_version(Command):
-        description = "report generated version string"
-        user_options = []
-        boolean_options = []
-
-        def initialize_options(self):
-            pass
-
-        def finalize_options(self):
-            pass
-
-        def run(self):
-            vers = get_versions(verbose=True)
-            print("Version: %s" % vers["version"])
-            print(" full-revisionid: %s" % vers.get("full-revisionid"))
-            print(" dirty: %s" % vers.get("dirty"))
-            print(" date: %s" % vers.get("date"))
-            if vers["error"]:
-                print(" error: %s" % vers["error"])
-
-    cmds["version"] = cmd_version
-
-    # we override "build_py" in both distutils and setuptools
-    #
-    # most invocation pathways end up running build_py:
-    #  distutils/build -> build_py
-    #  distutils/install -> distutils/build ->..
-    #  setuptools/bdist_wheel -> distutils/install ->..
-    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
-    #  setuptools/install -> bdist_egg ->..
-    #  setuptools/develop -> ?
-    #  pip install:
-    #   copies source tree to a tempdir before running egg_info/etc
-    #   if .git isn't copied too, 'git describe' will fail
-    #   then does setup.py bdist_wheel, or sometimes setup.py install
-    #  setup.py egg_info -> ?
-
-    # we override different "build_py" commands for both environments
-    if "setuptools" in sys.modules:
-        from setuptools.command.build_py import build_py as _build_py
-    else:
-        from distutils.command.build_py import build_py as _build_py
-
-    class cmd_build_py(_build_py):
-        def run(self):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            versions = get_versions()
-            _build_py.run(self)
-            # now locate _version.py in the new build/ directory and replace
-            # it with an updated value
-            if cfg.versionfile_build:
-                target_versionfile = os.path.join(
-                    self.build_lib, cfg.versionfile_build
-                )
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-    cmds["build_py"] = cmd_build_py
-
-    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
-        from cx_Freeze.dist import build_exe as _build_exe
-
-        # nczeczulin reports that py2exe won't like the pep440-style string
-        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
-        # setup(console=[{
-        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
-        #   "product_version": versioneer.get_version(),
-        #   ...
-
-        class cmd_build_exe(_build_exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _build_exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(
-                        LONG
-                        % {
-                            "DOLLAR": "$",
-                            "STYLE": cfg.style,
-                            "TAG_PREFIX": cfg.tag_prefix,
-                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        }
-                    )
-
-        cmds["build_exe"] = cmd_build_exe
-        del cmds["build_py"]
-
-    if "py2exe" in sys.modules:  # py2exe enabled?
-        try:
-            from py2exe.distutils_buildexe import py2exe as _py2exe  # py3
-        except ImportError:
-            from py2exe.build_exe import py2exe as _py2exe  # py2
-
-        class cmd_py2exe(_py2exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _py2exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(
-                        LONG
-                        % {
-                            "DOLLAR": "$",
-                            "STYLE": cfg.style,
-                            "TAG_PREFIX": cfg.tag_prefix,
-                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        }
-                    )
-
-        cmds["py2exe"] = cmd_py2exe
-
-    # we override different "sdist" commands for both environments
-    if "setuptools" in sys.modules:
-        from setuptools.command.sdist import sdist as _sdist
-    else:
-        from distutils.command.sdist import sdist as _sdist
-
-    class cmd_sdist(_sdist):
-        def run(self):
-            versions = get_versions()
-            self._versioneer_generated_versions = versions
-            # unless we update this, the command will keep using the old
-            # version
-            self.distribution.metadata.version = versions["version"]
-            return _sdist.run(self)
-
-        def make_release_tree(self, base_dir, files):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            _sdist.make_release_tree(self, base_dir, files)
-            # now locate _version.py in the new base_dir directory
-            # (remembering that it may be a hardlink) and replace it with an
-            # updated value
-            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
-            print("UPDATING %s" % target_versionfile)
-            write_to_version_file(
-                target_versionfile, self._versioneer_generated_versions
-            )
-
-    cmds["sdist"] = cmd_sdist
-
-    return cmds
-
-
-CONFIG_ERROR = """
-setup.cfg is missing the necessary Versioneer configuration. You need
-a section like:
-
- [versioneer]
- VCS = git
- style = pep440
- versionfile_source = src/myproject/_version.py
- versionfile_build = myproject/_version.py
- tag_prefix =
- parentdir_prefix = myproject-
-
-You will also need to edit your setup.py to use the results:
-
- import versioneer
- setup(version=versioneer.get_version(),
-       cmdclass=versioneer.get_cmdclass(), ...)
-
-Please read the docstring in ./versioneer.py for configuration instructions,
-edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
-"""
-
-SAMPLE_CONFIG = """
-# See the docstring in versioneer.py for instructions. Note that you must
-# re-run 'versioneer.py setup' after changing this section, and commit the
-# resulting files.
-
-[versioneer]
-#VCS = git
-#style = pep440
-#versionfile_source =
-#versionfile_build =
-#tag_prefix =
-#parentdir_prefix =
-
-"""
-
-INIT_PY_SNIPPET = """
-from ._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
-"""
-
-
-def do_setup():
-    """Main VCS-independent setup function for installing Versioneer."""
-    root = get_root()
-    try:
-        cfg = get_config_from_root(root)
-    except (
-        EnvironmentError,
-        configparser.NoSectionError,
-        configparser.NoOptionError,
-    ) as e:
-        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
-            print(
-                "Adding sample versioneer config to setup.cfg", file=sys.stderr
-            )
-            with open(os.path.join(root, "setup.cfg"), "a") as f:
-                f.write(SAMPLE_CONFIG)
-        print(CONFIG_ERROR, file=sys.stderr)
-        return 1
-
-    print(" creating %s" % cfg.versionfile_source)
-    with open(cfg.versionfile_source, "w") as f:
-        LONG = LONG_VERSION_PY[cfg.VCS]
-        f.write(
-            LONG
-            % {
-                "DOLLAR": "$",
-                "STYLE": cfg.style,
-                "TAG_PREFIX": cfg.tag_prefix,
-                "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                "VERSIONFILE_SOURCE": cfg.versionfile_source,
-            }
-        )
-
-    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
-    if os.path.exists(ipy):
-        try:
-            with open(ipy, "r") as f:
-                old = f.read()
-        except EnvironmentError:
-            old = ""
-        if INIT_PY_SNIPPET not in old:
-            print(" appending to %s" % ipy)
-            with open(ipy, "a") as f:
-                f.write(INIT_PY_SNIPPET)
-        else:
-            print(" %s unmodified" % ipy)
-    else:
-        print(" %s doesn't exist, ok" % ipy)
-        ipy = None
-
-    # Make sure both the top-level "versioneer.py" and versionfile_source
-    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
-    # they'll be copied into source distributions. Pip won't be able to
-    # install the package without this.
-    manifest_in = os.path.join(root, "MANIFEST.in")
-    simple_includes = set()
-    try:
-        with open(manifest_in, "r") as f:
-            for line in f:
-                if line.startswith("include "):
-                    for include in line.split()[1:]:
-                        simple_includes.add(include)
-    except EnvironmentError:
-        pass
-    # That doesn't cover everything MANIFEST.in can do
-    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
-    # it might give some false negatives. Appending redundant 'include'
-    # lines is safe, though.
-    if "versioneer.py" not in simple_includes:
-        print(" appending 'versioneer.py' to MANIFEST.in")
-        with open(manifest_in, "a") as f:
-            f.write("include versioneer.py\n")
-    else:
-        print(" 'versioneer.py' already in MANIFEST.in")
-    if cfg.versionfile_source not in simple_includes:
-        print(
-            " appending versionfile_source ('%s') to MANIFEST.in"
-            % cfg.versionfile_source
-        )
-        with open(manifest_in, "a") as f:
-            f.write("include %s\n" % cfg.versionfile_source)
-    else:
-        print(" versionfile_source already in MANIFEST.in")
-
-    # Make VCS-specific changes. For git, this means creating/changing
-    # .gitattributes to mark _version.py for export-subst keyword
-    # substitution.
-    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
-    return 0
-
-
-def scan_setup_py():
-    """Validate the contents of setup.py against Versioneer's expectations."""
-    found = set()
-    setters = False
-    errors = 0
-    with open("setup.py", "r") as f:
-        for line in f.readlines():
-            if "import versioneer" in line:
-                found.add("import")
-            if "versioneer.get_cmdclass()" in line:
-                found.add("cmdclass")
-            if "versioneer.get_version()" in line:
-                found.add("get_version")
-            if "versioneer.VCS" in line:
-                setters = True
-            if "versioneer.versionfile_source" in line:
-                setters = True
-    if len(found) != 3:
-        print("")
-        print("Your setup.py appears to be missing some important items")
-        print("(but I might be wrong). Please make sure it has something")
-        print("roughly like the following:")
-        print("")
-        print(" import versioneer")
-        print(" setup( version=versioneer.get_version(),")
-        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
-        print("")
-        errors += 1
-    if setters:
-        print("You should remove lines like 'versioneer.VCS = ' and")
-        print("'versioneer.versionfile_source = ' . This configuration")
-        print("now lives in setup.cfg, and should be removed from setup.py")
-        print("")
-        errors += 1
-    return errors
-
-
-if __name__ == "__main__":
-    cmd = sys.argv[1]
-    if cmd == "setup":
-        errors = do_setup()
-        errors += scan_setup_py()
-        if errors:
-            sys.exit(1)
diff --git a/setup.cfg b/setup.cfg
index 3f8fa7e8406..962b7d73bbe 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,4 @@
-# Copyright (c) 2017-2022, NVIDIA CORPORATION.
+# Copyright (c) 2017-2023, NVIDIA CORPORATION.
 
 [flake8]
 filename = *.py, *.pyx, *.pxd, *.pxi
@@ -44,8 +44,7 @@ ignore_missing_imports = True
 # they are imported by a checked file.
 follow_imports = skip
 exclude = (?x)(
-  (cudf|custreamz|cudf_kafka|dask_cudf)/_version\.py
-  | cudf/_lib/
+  cudf/_lib/
   | cudf/cudf/benchmarks/
   | cudf/cudf/tests/
   | cudf/cudf/utils/metadata/orc_column_statistics_pb2.py
@@ -57,7 +56,7 @@ exclude = (?x)(
 [codespell]
 # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
 # this is only to allow you to run codespell interactively
-skip = ./.git,./.github,./cpp/build,.*egg-info.*,versioneer.py,./.mypy_cache,./cpp/tests,./python/cudf/cudf/tests,./java/src/test,./cpp/include/cudf_test/cxxopts.hpp
+skip = ./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,./cpp/tests,./python/cudf/cudf/tests,./java/src/test,./cpp/include/cudf_test/cxxopts.hpp
 # ignore short words, and typename parameters like OffsetT
 ignore-regex = \b(.{1,4}|[A-Z]\w*T)\b
 ignore-words-list = inout,unparseable

From f076905f53f3f2d44ada244c0754afc459b306a1 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 22 Feb 2023 20:52:02 -0500
Subject: [PATCH 04/60] Add segmented reduction support for fixed-point types
 (#12680)

Depends on #12573

Adds additional support for fixed-point types in `cudf::segmented_reduce` for simple aggregations: sum, product, and sum-of-squares.
Reference: #10432

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12680
---
 cpp/CMakeLists.txt                            |   1 +
 cpp/src/reductions/segmented/compound.cuh     |  29 +-
 cpp/src/reductions/segmented/counts.cu        |  54 ++++
 cpp/src/reductions/segmented/counts.hpp       |  55 ++++
 cpp/src/reductions/segmented/simple.cuh       |  91 ++++--
 .../reductions/segmented_reduction_tests.cpp  | 305 +++++++++---------
 6 files changed, 341 insertions(+), 194 deletions(-)
 create mode 100644 cpp/src/reductions/segmented/counts.cu
 create mode 100644 cpp/src/reductions/segmented/counts.hpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d402a47628c..96524b7c55f 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -455,6 +455,7 @@ add_library(
   src/reductions/scan/scan_inclusive.cu
   src/reductions/segmented/all.cu
   src/reductions/segmented/any.cu
+  src/reductions/segmented/counts.cu
   src/reductions/segmented/max.cu
   src/reductions/segmented/mean.cu
   src/reductions/segmented/min.cu
diff --git a/cpp/src/reductions/segmented/compound.cuh b/cpp/src/reductions/segmented/compound.cuh
index dc8a995d1b0..e8abd32cf61 100644
--- a/cpp/src/reductions/segmented/compound.cuh
+++ b/cpp/src/reductions/segmented/compound.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include "counts.hpp"
 #include "update_validity.hpp"
 
 #include <cudf/column/column_factories.hpp>
@@ -63,34 +64,26 @@ std::unique_ptr<column> compound_segmented_reduction(column_view const& col,
     data_type{type_to_id<ResultType>()}, num_segments, mask_state::UNALLOCATED, stream, mr);
   auto out_itr = result->mutable_view().template begin<ResultType>();
 
-  // Compute valid counts
-  rmm::device_uvector<size_type> valid_counts(num_segments, stream);
-  if (col.has_nulls() && (null_handling == null_policy::EXCLUDE)) {
-    auto valid_fn = [] __device__(auto p) -> size_type { return static_cast<size_type>(p.second); };
-    auto itr      = thrust::make_transform_iterator(d_col->pair_begin<InputType, true>(), valid_fn);
-    cudf::reduction::detail::segmented_reduce(itr,
-                                              offsets.begin(),
-                                              offsets.end(),
-                                              valid_counts.data(),
-                                              thrust::plus<size_type>{},
-                                              0,
-                                              stream);
-  } else {
-    thrust::adjacent_difference(
-      rmm::exec_policy(stream), offsets.begin() + 1, offsets.end(), valid_counts.begin());
-  }
+  // Compute counts
+  rmm::device_uvector<size_type> counts =
+    cudf::reduction::detail::segmented_counts(col.null_mask(),
+                                              col.has_nulls(),
+                                              offsets,
+                                              null_handling,
+                                              stream,
+                                              rmm::mr::get_current_device_resource());
 
   // Run segmented reduction
   if (col.has_nulls()) {
     auto nrt = compound_op.template get_null_replacing_element_transformer<ResultType>();
     auto itr = thrust::make_transform_iterator(d_col->pair_begin<InputType, true>(), nrt);
     cudf::reduction::detail::segmented_reduce(
-      itr, offsets.begin(), offsets.end(), out_itr, compound_op, ddof, valid_counts.data(), stream);
+      itr, offsets.begin(), offsets.end(), out_itr, compound_op, ddof, counts.data(), stream);
   } else {
     auto et  = compound_op.template get_element_transformer<ResultType>();
     auto itr = thrust::make_transform_iterator(d_col->begin<InputType>(), et);
     cudf::reduction::detail::segmented_reduce(
-      itr, offsets.begin(), offsets.end(), out_itr, compound_op, ddof, valid_counts.data(), stream);
+      itr, offsets.begin(), offsets.end(), out_itr, compound_op, ddof, counts.data(), stream);
   }
 
   // Compute the output null mask
diff --git a/cpp/src/reductions/segmented/counts.cu b/cpp/src/reductions/segmented/counts.cu
new file mode 100644
index 00000000000..b9064ad3ffe
--- /dev/null
+++ b/cpp/src/reductions/segmented/counts.cu
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "counts.hpp"
+
+#include <cudf/detail/null_mask.cuh>
+
+#include <thrust/adjacent_difference.h>
+
+namespace cudf {
+namespace reduction {
+namespace detail {
+
+rmm::device_uvector<size_type> segmented_counts(bitmask_type const* null_mask,
+                                                bool has_nulls,
+                                                device_span<size_type const> offsets,
+                                                null_policy null_handling,
+                                                rmm::cuda_stream_view stream,
+                                                rmm::mr::device_memory_resource* mr)
+{
+  auto const num_segments = offsets.size() - 1;
+
+  if (has_nulls && (null_handling == null_policy::EXCLUDE)) {
+    return cudf::detail::segmented_count_bits(null_mask,
+                                              offsets.begin(),
+                                              offsets.end() - 1,
+                                              offsets.begin() + 1,
+                                              cudf::detail::count_bits_policy::SET_BITS,
+                                              stream,
+                                              mr);
+  }
+
+  rmm::device_uvector<size_type> valid_counts(num_segments, stream, mr);
+  thrust::adjacent_difference(
+    rmm::exec_policy(stream), offsets.begin() + 1, offsets.end(), valid_counts.begin());
+  return valid_counts;
+}
+
+}  // namespace detail
+}  // namespace reduction
+}  // namespace cudf
diff --git a/cpp/src/reductions/segmented/counts.hpp b/cpp/src/reductions/segmented/counts.hpp
new file mode 100644
index 00000000000..c5ee1fadae7
--- /dev/null
+++ b/cpp/src/reductions/segmented/counts.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+namespace cudf {
+class column_device_view;
+
+namespace reduction {
+namespace detail {
+
+/**
+ * @brief Compute the number of elements per segment
+ *
+ * If `null_handling == null_policy::EXCLUDE`, the count for each
+ * segment omits any null entries. Otherwise, this returns the number
+ * of elements in each segment.
+ *
+ * @param null_mask Null values over which the segment offsets apply
+ * @param has_nulls True if d_col contains any nulls
+ * @param offsets Indices to segment boundaries
+ * @param null_handling How null entries are processed within each segment
+ * @param stream Used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return The number of elements in each segment
+ */
+rmm::device_uvector<size_type> segmented_counts(bitmask_type const* null_mask,
+                                                bool has_nulls,
+                                                device_span<size_type const> offsets,
+                                                null_policy null_handling,
+                                                rmm::cuda_stream_view stream,
+                                                rmm::mr::device_memory_resource* mr);
+
+}  // namespace detail
+}  // namespace reduction
+}  // namespace cudf
diff --git a/cpp/src/reductions/segmented/simple.cuh b/cpp/src/reductions/segmented/simple.cuh
index fb080ebf67c..0c22848fd89 100644
--- a/cpp/src/reductions/segmented/simple.cuh
+++ b/cpp/src/reductions/segmented/simple.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include "counts.hpp"
 #include "update_validity.hpp"
 
 #include <cudf/detail/aggregation/aggregation.hpp>
@@ -36,6 +37,7 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
+#include <thrust/reduce.h>
 
 #include <optional>
 #include <type_traits>
@@ -188,7 +190,7 @@ std::unique_ptr<column> string_segmented_reduction(column_view const& col,
 }
 
 /**
- * @brief Fixed point segmented reduction for 'min', 'max'.
+ * @brief Specialization for fixed-point segmented reduction
  *
  * @tparam InputType    the input column data-type
  * @tparam Op           the operator of cudf::reduction::op::
@@ -200,11 +202,7 @@ std::unique_ptr<column> string_segmented_reduction(column_view const& col,
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return Output column in device memory
  */
-
-template <typename InputType,
-          typename Op,
-          CUDF_ENABLE_IF(std::is_same_v<Op, cudf::reduction::op::min> ||
-                         std::is_same_v<Op, cudf::reduction::op::max>)>
+template <typename InputType, typename Op>
 std::unique_ptr<column> fixed_point_segmented_reduction(
   column_view const& col,
   device_span<size_type const> offsets,
@@ -214,23 +212,55 @@ std::unique_ptr<column> fixed_point_segmented_reduction(
   rmm::mr::device_memory_resource* mr)
 {
   using RepType = device_storage_type_t<InputType>;
-  return simple_segmented_reduction<RepType, RepType, Op>(
-    col, offsets, null_handling, init, stream, mr);
-}
+  auto result =
+    simple_segmented_reduction<RepType, RepType, Op>(col, offsets, null_handling, init, stream, mr);
+  auto const scale = [&] {
+    if constexpr (std::is_same_v<Op, cudf::reduction::op::product>) {
+      // The product aggregation requires updating the scale of the fixed-point output column.
+      // The output scale needs to be the maximum count of all segments multiplied by
+      // the input scale value.
+      rmm::device_uvector<size_type> const counts =
+        cudf::reduction::detail::segmented_counts(col.null_mask(),
+                                                  col.has_nulls(),
+                                                  offsets,
+                                                  null_policy::EXCLUDE,  // do not count nulls
+                                                  stream,
+                                                  rmm::mr::get_current_device_resource());
+
+      auto const max_count = thrust::reduce(rmm::exec_policy(stream),
+                                            counts.begin(),
+                                            counts.end(),
+                                            size_type{0},
+                                            thrust::maximum<size_type>{});
+
+      auto const new_scale = numeric::scale_type{col.type().scale() * max_count};
+
+      // adjust values in each segment to match the new scale
+      auto const d_col = column_device_view::create(col, stream);
+      thrust::transform(rmm::exec_policy(stream),
+                        d_col->begin<InputType>(),
+                        d_col->end<InputType>(),
+                        d_col->begin<InputType>(),
+                        [new_scale] __device__(auto fp) { return fp.rescaled(new_scale); });
+      return new_scale;
+    }
 
-template <typename InputType,
-          typename Op,
-          CUDF_ENABLE_IF(!std::is_same_v<Op, cudf::reduction::op::min>() &&
-                         !std::is_same_v<Op, cudf::reduction::op::max>())>
-std::unique_ptr<column> fixed_point_segmented_reduction(
-  column_view const& col,
-  device_span<size_type const> offsets,
-  null_policy null_handling,
-  std::optional<std::reference_wrapper<scalar const>>,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FAIL("Segmented reduction on fixed point column only supports min and max reduction.");
+    if constexpr (std::is_same_v<Op, cudf::reduction::op::sum_of_squares>) {
+      return numeric::scale_type{col.type().scale() * 2};
+    }
+
+    return numeric::scale_type{col.type().scale()};
+  }();
+
+  auto const size       = result->size();        // get these before
+  auto const null_count = result->null_count();  // release() is called
+  auto contents         = result->release();
+
+  return std::make_unique<column>(data_type{type_to_id<InputType>(), scale},
+                                  size,
+                                  std::move(*(contents.data.release())),
+                                  std::move(*(contents.null_mask.release())),
+                                  null_count);
 }
 
 /**
@@ -431,8 +461,23 @@ struct column_type_dispatcher {
     return reduce_numeric<ElementType>(col, offsets, output_type, null_handling, init, stream, mr);
   }
 
+  template <typename ElementType, std::enable_if_t<cudf::is_fixed_point<ElementType>()>* = nullptr>
+  std::unique_ptr<column> operator()(column_view const& col,
+                                     device_span<size_type const> offsets,
+                                     data_type const output_type,
+                                     null_policy null_handling,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    CUDF_EXPECTS(output_type == col.type(), "Output type must be same as input column type.");
+    return fixed_point_segmented_reduction<ElementType, Op>(
+      col, offsets, null_handling, init, stream, mr);
+  }
+
   template <typename ElementType,
-            typename std::enable_if_t<not cudf::is_numeric<ElementType>()>* = nullptr>
+            std::enable_if_t<not cudf::is_numeric<ElementType>() and
+                             not cudf::is_fixed_point<ElementType>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const&,
                                      device_span<size_type const>,
                                      data_type const,
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index b4873a14509..74c5e7fb504 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -1094,223 +1094,222 @@ struct SegmentedReductionFixedPointTest : public cudf::test::BaseFixture {
 
 TYPED_TEST_SUITE(SegmentedReductionFixedPointTest, cudf::test::FixedPointTypes);
 
-TYPED_TEST(SegmentedReductionFixedPointTest, MaxIncludeNulls)
+TYPED_TEST(SegmentedReductionFixedPointTest, MaxWithNulls)
 {
-  // scale: -2, 0, 5
-  // [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
-  // values:    {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
-  // offsets:   {0, 3, 6, 7, 8, 10, 10}
-  // nullmask:  {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
-  // outputs:   {3, XXX, 1, XXX, XXX, XXX}
-  // output nullmask: {1, 0, 1, 0, 0, 0}
-
   using RepType = cudf::device_storage_type_t<TypeParam>;
 
+  auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const agg = cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>();
+
   for (auto scale : {-2, 0, 5}) {
     auto const input =
       cudf::test::fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
                                                       {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
                                                       numeric::scale_type{scale});
-    auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto const d_offsets =
-      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
-    auto out_type     = cudf::column_view(input).type();
-    auto const expect = cudf::test::fixed_point_column_wrapper<RepType>(
+    auto out_type = cudf::column_view(input).type();
+    auto expect   = cudf::test::fixed_point_column_wrapper<RepType>(
       {3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale});
+    auto result =
+      cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
 
-    auto res =
-      cudf::segmented_reduce(input,
-                             d_offsets,
-                             *cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>(),
-                             out_type,
-                             cudf::null_policy::INCLUDE);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+    expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
   }
 }
 
-TYPED_TEST(SegmentedReductionFixedPointTest, MaxExcludeNulls)
+TYPED_TEST(SegmentedReductionFixedPointTest, MinWithNulls)
 {
-  // scale: -2, 0, 5
-  // [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
-  // values:    {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
-  // offsets:   {0, 3, 6, 7, 8, 10, 10}
-  // nullmask:  {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
-  // outputs:   {3, 3, 1, XXX, XXX, XXX}
-  // output nullmask: {1, 1, 1, 0, 0, 0}
-
   using RepType = cudf::device_storage_type_t<TypeParam>;
 
+  auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const agg = cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>();
+
   for (auto scale : {-2, 0, 5}) {
     auto const input =
       cudf::test::fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
                                                       {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
                                                       numeric::scale_type{scale});
-    auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto const d_offsets =
-      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+    auto out_type = cudf::column_view(input).type();
+    auto expect   = cudf::test::fixed_point_column_wrapper<RepType>(
+      {1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale});
+    auto result =
+      cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
+
+    expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
+  }
+}
+
+TYPED_TEST(SegmentedReductionFixedPointTest, MaxNonNullableInput)
+{
+  using RepType = cudf::device_storage_type_t<TypeParam>;
+
+  auto const offsets = std::vector<cudf::size_type>{0, 3, 4, 4};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const agg = cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>();
+
+  for (auto scale : {-2, 0, 5}) {
+    auto const input =
+      cudf::test::fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
     auto out_type     = cudf::column_view(input).type();
     auto const expect = cudf::test::fixed_point_column_wrapper<RepType>(
-      {3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
+      {3, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale});
 
-    auto res =
-      cudf::segmented_reduce(input,
-                             d_offsets,
-                             *cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>(),
-                             out_type,
-                             cudf::null_policy::EXCLUDE);
+    auto result =
+      cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
 
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
   }
 }
 
-TYPED_TEST(SegmentedReductionFixedPointTest, MinIncludeNulls)
+TYPED_TEST(SegmentedReductionFixedPointTest, MinNonNullableInput)
 {
-  // scale: -2, 0, 5
-  // [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
-  // values:    {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
-  // offsets:   {0, 3, 6, 7, 8, 10, 10}
-  // nullmask:  {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
-  // outputs:   {1, XXX, 1, XXX, XXX, XXX}
-  // output nullmask: {1, 0, 1, 0, 0, 0}
-
   using RepType = cudf::device_storage_type_t<TypeParam>;
 
+  auto const offsets = std::vector<cudf::size_type>{0, 3, 4, 4};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const agg = cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>();
+
   for (auto scale : {-2, 0, 5}) {
     auto const input =
-      cudf::test::fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                      {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
-                                                      numeric::scale_type{scale});
-    auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto const d_offsets =
-      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+      cudf::test::fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
     auto out_type     = cudf::column_view(input).type();
     auto const expect = cudf::test::fixed_point_column_wrapper<RepType>(
-      {1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale});
+      {1, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale});
 
-    auto res =
-      cudf::segmented_reduce(input,
-                             d_offsets,
-                             *cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>(),
-                             out_type,
-                             cudf::null_policy::INCLUDE);
+    auto result =
+      cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
 
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
   }
 }
 
-TYPED_TEST(SegmentedReductionFixedPointTest, MinExcludeNulls)
+TYPED_TEST(SegmentedReductionFixedPointTest, Sum)
 {
-  // scale: -2, 0, 5
-  // [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
-  // values:    {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
-  // offsets:   {0, 3, 6, 7, 8, 10, 10}
-  // nullmask:  {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
-  // outputs:   {1, 1, 1, XXX, XXX, XXX}
-  // output nullmask: {1, 1, 1, 0, 0, 0}
-
   using RepType = cudf::device_storage_type_t<TypeParam>;
 
+  auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const agg = cudf::make_sum_aggregation<cudf::segmented_reduce_aggregation>();
+
   for (auto scale : {-2, 0, 5}) {
-    auto const input =
-      cudf::test::fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+    auto input =
+      cudf::test::fixed_point_column_wrapper<RepType>({-10, 0, 33, 100, XXX, 53, 11, XXX, XXX, XXX},
                                                       {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
                                                       numeric::scale_type{scale});
-    auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto const d_offsets =
-      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
-    auto out_type     = cudf::column_view(input).type();
-    auto const expect = cudf::test::fixed_point_column_wrapper<RepType>(
-      {1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
+    auto const out_type = cudf::column_view(input).type();
+
+    auto expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {23, XXX, 11, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale});
+    auto result =
+      cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
 
-    auto res =
-      cudf::segmented_reduce(input,
-                             d_offsets,
-                             *cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>(),
-                             out_type,
-                             cudf::null_policy::EXCLUDE);
+    expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {23, 153, 11, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
 
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+    input = cudf::test::fixed_point_column_wrapper<RepType>(
+      {-10, 0, 33, 100, 123, 53, 11, 0, -120, 88}, numeric::scale_type{scale});
+    expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {23, 276, 11, 0, -32, XXX}, {1, 1, 1, 1, 1, 0}, numeric::scale_type{scale});
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
   }
 }
 
-TYPED_TEST(SegmentedReductionFixedPointTest, MaxNonNullableInput)
+TYPED_TEST(SegmentedReductionFixedPointTest, Product)
 {
-  // scale: -2, 0, 5
-  // [1, 2, 3], [1], []
-  // values:    {1, 2, 3, 1}
-  // offsets:   {0, 3, 4}
-  // outputs:   {3, 1, XXX}
-  // output nullmask: {1, 1, 0}
-
   using RepType = cudf::device_storage_type_t<TypeParam>;
 
+  auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 12, 12};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const agg = cudf::make_product_aggregation<cudf::segmented_reduce_aggregation>();
+
   for (auto scale : {-2, 0, 5}) {
-    auto const input =
-      cudf::test::fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
-    auto const offsets = std::vector<cudf::size_type>{0, 3, 4, 4};
-    auto const d_offsets =
-      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
-    auto out_type     = cudf::column_view(input).type();
-    auto const expect = cudf::test::fixed_point_column_wrapper<RepType>(
-      {3, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale});
+    auto input = cudf::test::fixed_point_column_wrapper<RepType>(
+      {-10, 1, 33, 40, XXX, 50, 11000, XXX, XXX, XXX, XXX, XXX},
+      {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0},
+      numeric::scale_type{scale});
+    auto const out_type = cudf::column_view(input).type();
+    auto result =
+      cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    auto expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {-330, XXX, 11000, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale * 3});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
+
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {-330, 2000, 11000, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale * 3});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
 
-    auto include_null_res =
-      cudf::segmented_reduce(input,
-                             d_offsets,
-                             *cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>(),
-                             out_type,
-                             cudf::null_policy::INCLUDE);
-
-    auto exclude_null_res =
-      cudf::segmented_reduce(input,
-                             d_offsets,
-                             *cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>(),
-                             out_type,
-                             cudf::null_policy::EXCLUDE);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*include_null_res, expect);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*exclude_null_res, expect);
+    input = cudf::test::fixed_point_column_wrapper<RepType>(
+      {-10, 1, 33, 3, 40, 50, 11000, 0, -3, 50, 10, 4}, numeric::scale_type{scale});
+    expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {-330, 6000, 11000, 0, -6000, XXX}, {1, 1, 1, 1, 1, 0}, numeric::scale_type{scale * 4});
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
   }
 }
 
-TYPED_TEST(SegmentedReductionFixedPointTest, MinNonNullableInput)
+TYPED_TEST(SegmentedReductionFixedPointTest, SumOfSquares)
 {
-  // scale: -2, 0, 5
-  // [1, 2, 3], [1], []
-  // values:    {1, 2, 3, 1}
-  // offsets:   {0, 3, 4}
-  // outputs:   {1, 1, XXX}
-  // output nullmask: {1, 1, 0}
-
   using RepType = cudf::device_storage_type_t<TypeParam>;
 
+  auto const offsets = std::vector<cudf::size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const agg = cudf::make_sum_of_squares_aggregation<cudf::segmented_reduce_aggregation>();
+
   for (auto scale : {-2, 0, 5}) {
-    auto const input =
-      cudf::test::fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
-    auto const offsets = std::vector<cudf::size_type>{0, 3, 4, 4};
-    auto const d_offsets =
-      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
-    auto out_type     = cudf::column_view(input).type();
-    auto const expect = cudf::test::fixed_point_column_wrapper<RepType>(
-      {1, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale});
+    auto input =
+      cudf::test::fixed_point_column_wrapper<RepType>({-10, 0, 33, 100, XXX, 53, 11, XXX, XXX, XXX},
+                                                      {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
+                                                      numeric::scale_type{scale});
+    auto const out_type = cudf::column_view(input).type();
+
+    auto expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {1189, XXX, 121, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale * 2});
+    auto result =
+      cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
 
-    auto include_null_res =
-      cudf::segmented_reduce(input,
-                             d_offsets,
-                             *cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>(),
-                             out_type,
-                             cudf::null_policy::INCLUDE);
-
-    auto exclude_null_res =
-      cudf::segmented_reduce(input,
-                             d_offsets,
-                             *cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>(),
-                             out_type,
-                             cudf::null_policy::EXCLUDE);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*include_null_res, expect);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*exclude_null_res, expect);
+    expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {1189, 12809, 121, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale * 2});
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
+
+    input = cudf::test::fixed_point_column_wrapper<RepType>(
+      {-10, 0, 33, 100, 123, 53, 11, 0, -120, 88}, numeric::scale_type{scale});
+    expect = cudf::test::fixed_point_column_wrapper<RepType>(
+      {1189, 27938, 121, 0, 22144, XXX}, {1, 1, 1, 1, 1, 0}, numeric::scale_type{scale * 2});
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
+    result = cudf::segmented_reduce(input, d_offsets, *agg, out_type, cudf::null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
   }
 }
 

From fffdc0cdac913d402c91880919ee391877f47514 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 22 Feb 2023 20:52:42 -0500
Subject: [PATCH 05/60] Add compute-sanitizer github workflow action to nightly
 tests (#12800)

Adds github workflow action to the nightly tests for running `compute-sanitizer` on the libcudf gtests.

Reference: #12530

Authors:
  - David Wendt (https://github.com/davidwendt)
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12800
---
 .github/workflows/test.yaml                   | 12 +++++++
 ci/test_cpp.sh                                | 30 +----------------
 ci/test_cpp_common.sh                         | 32 +++++++++++++++++++
 ci/test_cpp_memcheck.sh                       | 25 +++++++++++++++
 .../all_cuda-118_arch-x86_64.yaml             |  1 +
 dependencies.yaml                             | 12 +++++++
 6 files changed, 83 insertions(+), 29 deletions(-)
 create mode 100644 ci/test_cpp_common.sh
 create mode 100755 ci/test_cpp_memcheck.sh

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index ff19d51f8ef..4d9e97a7b28 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -22,6 +22,18 @@ jobs:
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
+  conda-cpp-memcheck-tests:
+    secrets: inherit
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
+    with:
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      node_type: "gpu-latest-1"
+      arch: "amd64"
+      container_image: "rapidsai/ci:latest"
+      run_script: "ci/test_cpp_memcheck.sh"
   conda-python-cudf-tests:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 983a63d4ce9..bd7a82afbea 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -1,35 +1,7 @@
 #!/bin/bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-set -euo pipefail
-
-. /opt/conda/etc/profile.d/conda.sh
-
-rapids-logger "Generate C++ testing dependencies"
-rapids-dependency-file-generator \
-  --output conda \
-  --file_key test_cpp \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml
-
-rapids-mamba-retry env create --force -f env.yaml -n test
-
-# Temporarily allow unbound variables for conda activation.
-set +u
-conda activate test
-set -u
-
-CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
-mkdir -p "${RAPIDS_TESTS_DIR}"
-
-rapids-print-env
-
-rapids-mamba-retry install \
-  --channel "${CPP_CHANNEL}" \
-  libcudf libcudf_kafka libcudf-tests
-
-rapids-logger "Check GPU usage"
-nvidia-smi
+source "$(dirname "$0")/test_cpp_common.sh"
 
 EXITCODE=0
 trap "EXITCODE=1" ERR
diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh
new file mode 100644
index 00000000000..c7c095dc4df
--- /dev/null
+++ b/ci/test_cpp_common.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+. /opt/conda/etc/profile.d/conda.sh
+
+rapids-logger "Generate C++ testing dependencies"
+rapids-dependency-file-generator \
+  --output conda \
+  --file_key test_cpp \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml
+
+rapids-mamba-retry env create --force -f env.yaml -n test
+
+# Temporarily allow unbound variables for conda activation.
+set +u
+conda activate test
+set -u
+
+CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
+rapids-print-env
+
+rapids-mamba-retry install \
+  --channel "${CPP_CHANNEL}" \
+  libcudf libcudf_kafka libcudf-tests
+
+rapids-logger "Check GPU usage"
+nvidia-smi
diff --git a/ci/test_cpp_memcheck.sh b/ci/test_cpp_memcheck.sh
new file mode 100755
index 00000000000..0cad4fc3a3f
--- /dev/null
+++ b/ci/test_cpp_memcheck.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+source "$(dirname "$0")/test_cpp_common.sh"
+
+EXITCODE=0
+trap "EXITCODE=1" ERR
+set +e
+
+# Run gtests with compute-sanitizer
+rapids-logger "Memcheck gtests with rmm_mode=cuda"
+export GTEST_CUDF_RMM_MODE=cuda
+COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
+for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
+    test_name=$(basename ${gt})
+    if [[ "$test_name" == "ERROR_TEST" ]] || [[ "$test_name" == "STREAM_IDENTIFICATION_TEST" ]]; then
+        continue
+    fi
+    echo "Running compute-sanitizer on $test_name"
+    ${COMPUTE_SANITIZER_CMD} ${gt} --gtest_output=xml:"${RAPIDS_TESTS_DIR}${test_name}.xml"
+done
+unset GTEST_CUDF_RMM_MODE
+
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 675df3891c3..44d6be65574 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -16,6 +16,7 @@ dependencies:
 - cmake>=3.23.1,!=3.25.0
 - cubinlinker
 - cuda-python>=11.7.1,<12.0
+- cuda-sanitizer-api=11.8.86
 - cudatoolkit=11.8
 - cupy>=9.5.0,<12.0.0a0
 - cxx-compiler
diff --git a/dependencies.yaml b/dependencies.yaml
index ae8eac4ea30..de9795b4b39 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -13,12 +13,14 @@ files:
       - notebooks
       - py_version
       - run
+      - test_cpp
       - test_python
   test_cpp:
     output: none
     includes:
       - cudatoolkit
       - libidentify_stream_usage_build
+      - test_cpp
   test_python:
     output: none
     includes:
@@ -266,6 +268,16 @@ dependencies:
               arch: aarch64
             packages:
               - cupy-cuda11x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
+  test_cpp:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "11.8"
+            packages:
+              - cuda-sanitizer-api=11.8.86
+          - matrix:
+            packages:
   test_java:
     common:
       - output_types: conda

From 5719463d2db460e5cbbeb266512904dc1b475c2c Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Thu, 23 Feb 2023 09:12:50 -0600
Subject: [PATCH 06/60] Add docs build job (#12592)

The PR adds a `docs_build` process to the PR and Build workflows for this repository. The generated docs are synced to s3 for only the build workflows.

Authors:
  - Jake Awe (https://github.com/AyodeAwe)
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12592
---
 .github/workflows/build.yaml | 11 +++++++++
 .github/workflows/pr.yaml    | 11 +++++++++
 .gitignore                   |  3 +++
 ci/build_docs.sh             | 47 ++++++++++++++++++++++++++++++++++++
 dependencies.yaml            | 13 +++++++---
 docs/cudf/source/conf.py     |  6 -----
 6 files changed, 82 insertions(+), 9 deletions(-)
 create mode 100755 ci/build_docs.sh

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index fa6704ef04e..024eb828e3c 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -53,6 +53,17 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       skip_upload_pkgs: libcudf-example
+  docs-build:
+    if: github.ref_type == 'branch' && github.event_name == 'push'
+    needs: python-build
+    secrets: inherit
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
+    with:
+      build_type: branch
+      node_type: "gpu-latest-1"
+      arch: "amd64"
+      container_image: "rapidsai/ci:latest"
+      run_script: "ci/build_docs.sh"
   wheel-build-cudf:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 73df2de20c2..952b58abda5 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -20,6 +20,7 @@ jobs:
       - conda-python-other-tests
       - conda-java-tests
       - conda-notebook-tests
+      - docs-build
       - wheel-build-cudf
       - wheel-tests-cudf
       - wheel-build-dask-cudf
@@ -82,6 +83,16 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci:latest"
       run_script: "ci/test_notebooks.sh"
+  docs-build:
+    needs: conda-python-build
+    secrets: inherit
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
+    with:
+      build_type: pull-request
+      node_type: "gpu-latest-1"
+      arch: "amd64"
+      container_image: "rapidsai/ci:latest"
+      run_script: "ci/build_docs.sh"
   wheel-build-cudf:
     needs: checks
     secrets: inherit
diff --git a/.gitignore b/.gitignore
index 2d83aad7712..fb5c301fe3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,6 +166,9 @@ docs/cudf/source/api_docs/generated/*
 docs/cudf/source/api_docs/api/*
 docs/cudf/source/user_guide/example_output/*
 docs/cudf/source/user_guide/cudf.*Dtype.*.rst
+_html
+_text
+jupyter_execute
 
 # cibuildwheel
 /wheelhouse
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
new file mode 100755
index 00000000000..9551d98e9fe
--- /dev/null
+++ b/ci/build_docs.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+rapids-logger "Create test conda environment"
+. /opt/conda/etc/profile.d/conda.sh
+
+rapids-dependency-file-generator \
+  --output conda \
+  --file_key docs \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+
+rapids-mamba-retry env create --force -f env.yaml -n docs
+conda activate docs
+
+rapids-print-env
+
+rapids-logger "Downloading artifacts from previous jobs"
+CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
+PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
+VERSION_NUMBER=$(rapids-get-rapids-version-from-git)
+
+rapids-mamba-retry install \
+  --channel "${CPP_CHANNEL}" \
+  --channel "${PYTHON_CHANNEL}" \
+  libcudf cudf dask-cudf
+
+
+rapids-logger "Build Doxygen docs"
+pushd cpp/doxygen
+aws s3 cp s3://rapidsai-docs/librmm/${VERSION_NUMBER}/html/rmm.tag . || echo "Failed to download rmm Doxygen tag"
+doxygen Doxyfile
+popd
+
+rapids-logger "Build Sphinx docs"
+pushd docs/cudf
+sphinx-build -b dirhtml source _html
+sphinx-build -b text source _text
+popd
+
+
+if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then
+  aws s3 sync --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html"
+  aws s3 sync --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html"
+  aws s3 sync --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt"
+fi
diff --git a/dependencies.yaml b/dependencies.yaml
index de9795b4b39..ba6d240e069 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -9,7 +9,7 @@ files:
       - build
       - cudatoolkit
       - develop
-      - doc
+      - docs
       - notebooks
       - py_version
       - run
@@ -43,6 +43,12 @@ files:
     includes:
       - develop
       - py_version
+  docs:
+    output: none
+    includes:
+      - cudatoolkit
+      - docs
+      - py_version
 channels:
   - rapidsai
   - rapidsai-nightly
@@ -123,10 +129,11 @@ dependencies:
       - output_types: conda
         packages:
           - doxygen=1.8.20 # pre-commit hook needs a specific version.
-  doc:
+  docs:
     common:
-      - output_types: [conda, requirements]
+      - output_types: [conda]
         packages:
+          - doxygen=1.8.20
           - myst-nb
           - nbsphinx
           - numpydoc
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index 371a8b4e1c1..3d92d955263 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 #
 # cudf documentation build configuration file, created by
@@ -23,11 +22,6 @@
 from docutils.nodes import Text
 from sphinx.addnodes import pending_xref
 
-import cudf
-
-sys.path.insert(0, os.path.abspath(cudf.__path__[0]))
-sys.path.insert(0, os.path.abspath("."))
-sys.path.insert(0, os.path.abspath("../.."))
 sys.path.append(os.path.abspath("./_ext"))
 
 # -- General configuration ------------------------------------------------

From 430d91e9acb0a3db7c45df214e5158750311e626 Mon Sep 17 00:00:00 2001
From: Mike Wilson <hyperbolic2346@users.noreply.github.com>
Date: Thu, 23 Feb 2023 13:56:32 -0500
Subject: [PATCH 07/60] Shuffling read into a sub function in parquet read
 (#12809)

This change is the first step toward the pipelined parquet reader and moves the chunk creation and file reads into another function. Right now, the operation is the same, but this change will allow for smaller groups to be read at a time for pipelining.

Authors:
  - Mike Wilson (https://github.com/hyperbolic2346)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12809
---
 cpp/src/io/parquet/reader_impl.hpp           | 15 ++++++-
 cpp/src/io/parquet/reader_impl_preprocess.cu | 43 ++++++++++++--------
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index fcfea35f50c..8b86412ae63 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -130,10 +130,21 @@ class reader::impl {
                     bool uses_custom_row_bounds,
                     host_span<std::vector<size_type> const> row_group_indices);
 
+  /**
+   * @brief Create chunk information and start file reads
+   *
+   * @param row_groups_info vector of information about row groups to read
+   * @param num_rows  Maximum number of rows to read
+   * @return pair of boolean indicating if compressed chunks were found and a vector of futures for
+   * read completion
+   */
+  std::pair<bool, std::vector<std::future<void>>> create_and_read_column_chunks(
+    cudf::host_span<row_group_info const> const row_groups_info, size_type num_rows);
+
   /**
    * @brief Load and decompress the input file(s) into memory.
    */
-  void load_and_decompress_data(std::vector<row_group_info> const& row_groups_info,
+  void load_and_decompress_data(cudf::host_span<row_group_info const> const row_groups_info,
                                 size_type num_rows);
 
   /**
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index b1d013a96a3..0f55cd6e400 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -651,16 +651,11 @@ void reader::impl::allocate_nesting_info()
   page_nesting_decode_info.host_to_device(_stream);
 }
 
-void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& row_groups_info,
-                                            size_type num_rows)
+std::pair<bool, std::vector<std::future<void>>> reader::impl::create_and_read_column_chunks(
+  cudf::host_span<row_group_info const> const row_groups_info, size_type num_rows)
 {
-  // This function should never be called if `num_rows == 0`.
-  CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero.");
-
-  auto& raw_page_data    = _file_itm_data.raw_page_data;
-  auto& decomp_page_data = _file_itm_data.decomp_page_data;
-  auto& chunks           = _file_itm_data.chunks;
-  auto& pages_info       = _file_itm_data.pages_info;
+  auto& raw_page_data = _file_itm_data.raw_page_data;
+  auto& chunks        = _file_itm_data.chunks;
 
   // Descriptors for all the chunks that make up the selected columns
   const auto num_input_columns = _input_columns.size();
@@ -732,7 +727,7 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
         total_decompressed_size += col_meta.total_uncompressed_size;
       }
     }
-    remaining_rows -= row_group.num_rows;
+    remaining_rows -= row_group_rows;
   }
 
   // Read compressed chunk data to device memory
@@ -745,12 +740,29 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
                                                          chunk_source_map,
                                                          _stream));
 
+  CUDF_EXPECTS(remaining_rows == 0, "All rows data must be read.");
+
+  return {total_decompressed_size > 0, std::move(read_rowgroup_tasks)};
+}
+
+void reader::impl::load_and_decompress_data(
+  cudf::host_span<row_group_info const> const row_groups_info, size_type num_rows)
+{
+  // This function should never be called if `num_rows == 0`.
+  CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero.");
+
+  auto& raw_page_data    = _file_itm_data.raw_page_data;
+  auto& decomp_page_data = _file_itm_data.decomp_page_data;
+  auto& chunks           = _file_itm_data.chunks;
+  auto& pages_info       = _file_itm_data.pages_info;
+
+  auto const [has_compressed_data, read_rowgroup_tasks] =
+    create_and_read_column_chunks(row_groups_info, num_rows);
+
   for (auto& task : read_rowgroup_tasks) {
     task.wait();
   }
 
-  CUDF_EXPECTS(remaining_rows <= 0, "All rows data must be read.");
-
   // Process dataset chunk pages into output columns
   auto const total_pages = count_page_headers(chunks, _stream);
   pages_info             = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
@@ -758,14 +770,11 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
   if (total_pages > 0) {
     // decoding of column/page information
     decode_page_headers(chunks, pages_info, _stream);
-    if (total_decompressed_size > 0) {
+    if (has_compressed_data) {
       decomp_page_data = decompress_page_data(chunks, pages_info, _stream);
       // Free compressed data
       for (size_t c = 0; c < chunks.size(); c++) {
-        if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
-          raw_page_data[c].reset();
-          // TODO: Check if this is called
-        }
+        if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { raw_page_data[c].reset(); }
       }
     }
 

From e64e26eda09f8508b7760ddba9f742c4f4e827cb Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Thu, 23 Feb 2023 18:07:56 -0800
Subject: [PATCH 08/60] Expose seed argument to hash_values (#12795)

This PR exposes the `seed` param to `hash_values` that is already supported by libcudf's `hash` method.
Closes #12775

Authors:
  - Ayush Dattagupta (https://github.com/ayushdg)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/12795
---
 python/cudf/cudf/core/indexed_frame.py   | 24 +++++++++++++--
 python/cudf/cudf/tests/test_dataframe.py | 39 +++++++++++++++++++++---
 2 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 43277fb55ff..2992cb005e5 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -1629,7 +1629,7 @@ def memory_usage(self, index=True, deep=False):
         """
         raise NotImplementedError
 
-    def hash_values(self, method="murmur3"):
+    def hash_values(self, method="murmur3", seed=None):
         """Compute the hash of values in this column.
 
         Parameters
@@ -1639,6 +1639,12 @@ def hash_values(self, method="murmur3"):
             * murmur3: MurmurHash3 hash function.
             * md5: MD5 hash function.
 
+        seed : int, optional
+            Seed value to use for the hash function.
+            Note - This only has effect for the following supported
+            hash functions:
+            * murmur3: MurmurHash3 hash function.
+
         Returns
         -------
         Series
@@ -1665,6 +1671,11 @@ def hash_values(self, method="murmur3"):
         1    947ca8d2c5f0f27437f156cfbfab0969
         2    d0580ef52d27c043c8e341fd5039b166
         dtype: object
+        >>> series.hash_values(method="murmur3", seed=42)
+        0    2364453205
+        1     422621911
+        2    3353449140
+        dtype: uint32
 
         **DataFrame**
 
@@ -1686,11 +1697,20 @@ def hash_values(self, method="murmur3"):
         2    fe061786ea286a515b772d91b0dfcd70
         dtype: object
         """
+        seed_hash_methods = {"murmur3"}
+        if seed is None:
+            seed = 0
+        elif method not in seed_hash_methods:
+            warnings.warn(
+                "Provided seed value has no effect for hash method"
+                f" `{method}`. Refer to the docstring for information"
+                " on hash methods that support the `seed` param"
+            )
         # Note that both Series and DataFrame return Series objects from this
         # calculation, necessitating the unfortunate circular reference to the
         # child class here.
         return cudf.Series._from_data(
-            {None: libcudf.hash.hash([*self._columns], method)},
+            {None: libcudf.hash.hash([*self._columns], method, seed)},
             index=self.index,
         )
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 09b9f57356c..13f312f6f0c 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -38,6 +38,7 @@
     NUMERIC_TYPES,
     assert_eq,
     assert_exceptions_equal,
+    assert_neq,
     does_not_raise,
     expect_warning_if,
     gen_rand,
@@ -1323,9 +1324,10 @@ def test_assign():
 
 @pytest.mark.parametrize("nrows", [1, 8, 100, 1000])
 @pytest.mark.parametrize("method", ["murmur3", "md5"])
-def test_dataframe_hash_values(nrows, method):
+@pytest.mark.parametrize("seed", [None, 42])
+def test_dataframe_hash_values(nrows, method, seed):
     gdf = cudf.DataFrame()
-    data = np.asarray(range(nrows))
+    data = np.arange(nrows)
     data[0] = data[-1]  # make first and last the same
     gdf["a"] = data
     gdf["b"] = gdf.a + 100
@@ -1334,12 +1336,41 @@ def test_dataframe_hash_values(nrows, method):
     assert len(out) == nrows
     assert out.dtype == np.uint32
 
+    warning_expected = (
+        True if seed is not None and method not in {"murmur3"} else False
+    )
     # Check single column
-    out_one = gdf[["a"]].hash_values(method=method)
+    if warning_expected:
+        with pytest.warns(
+            UserWarning, match="Provided seed value has no effect*"
+        ):
+            out_one = gdf[["a"]].hash_values(method=method, seed=seed)
+    else:
+        out_one = gdf[["a"]].hash_values(method=method, seed=seed)
     # First matches last
     assert out_one.iloc[0] == out_one.iloc[-1]
     # Equivalent to the cudf.Series.hash_values()
-    assert_eq(gdf["a"].hash_values(method=method), out_one)
+    if warning_expected:
+        with pytest.warns(
+            UserWarning, match="Provided seed value has no effect*"
+        ):
+            assert_eq(gdf["a"].hash_values(method=method, seed=seed), out_one)
+    else:
+        assert_eq(gdf["a"].hash_values(method=method, seed=seed), out_one)
+
+
+@pytest.mark.parametrize("method", ["murmur3"])
+def test_dataframe_hash_values_seed(method):
+    gdf = cudf.DataFrame()
+    data = np.arange(10)
+    data[0] = data[-1]  # make first and last the same
+    gdf["a"] = data
+    gdf["b"] = gdf.a + 100
+    out_one = gdf.hash_values(method=method, seed=0)
+    out_two = gdf.hash_values(method=method, seed=1)
+    assert out_one.iloc[0] == out_one.iloc[-1]
+    assert out_two.iloc[0] == out_two.iloc[-1]
+    assert_neq(out_one, out_two)
 
 
 @pytest.mark.parametrize("nrows", [3, 10, 100, 1000])

From 2e80eba6f75b03f039517c947f386ede65842a4c Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Fri, 24 Feb 2023 10:28:25 -0600
Subject: [PATCH 09/60] Fix parquet `RangeIndex` bug (#12838)

Possible fix for https://github.com/rapidsai/cudf/issues/12837
Avoids dropping RangeIndex when `columns` argument is passed to `read_parquet` (unless `columns=[]`).

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12838
---
 python/cudf/cudf/_lib/parquet.pyx      |  2 +-
 python/cudf/cudf/tests/test_parquet.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index e5520ae1987..464d9243408 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -170,7 +170,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
     allow_range_index = True
     if columns is not None:
         cpp_columns.reserve(len(columns))
-        allow_range_index = False
+        allow_range_index = len(columns) > 0
         for col in columns:
             cpp_columns.push_back(str(col).encode())
         args.set_columns(cpp_columns)
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index ccd62729a9d..661497e4650 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -2650,6 +2650,20 @@ def test_parquet_columns_and_index_param(index, columns):
     assert_eq(expected, got, check_index_type=True)
 
 
+@pytest.mark.parametrize("columns", [None, ["b", "a"]])
+def test_parquet_columns_and_range_index(columns):
+    buffer = BytesIO()
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=pd.RangeIndex(2, 5)
+    )
+    df.to_parquet(buffer)
+
+    expected = pd.read_parquet(buffer, columns=columns)
+    got = cudf.read_parquet(buffer, columns=columns)
+
+    assert_eq(expected, got, check_index_type=True)
+
+
 def test_parquet_nested_struct_list():
     buffer = BytesIO()
     data = {

From 0e4e6dd567964404934d96a1fe8fc14b1d25a526 Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Fri, 24 Feb 2023 12:07:51 -0500
Subject: [PATCH 10/60] Add `always_nullable` flag to Dremel encoding (#12727)

Closes #12389 by fixing the bug describe here https://github.com/rapidsai/cudf/issues/12389#issuecomment-1419949751.

This flag, when `always_nullable=true`, generates `definition levels` in the Dremel encoding such that it considers every nested column and child to be `nullable`, even if they actually are not. In the context of `two_table_comparators`, this helps us with producing consistently mapped `definition levels` in case there are some nested columns or children that are not nullable in either one or both of the tables.

This PR now exposes two APIs:

1. `cudf::detail::get_dremel_data(...)` : This API is consistent with standard Dremel encoding
2. `cudf::detail::get_comparator_data(...)` : This API modifies the definition levels in Dremel encoding to produce the effect described above

Authors:
  - Divye Gala (https://github.com/divyegala)
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12727
---
 cpp/include/cudf/lists/detail/dremel.hpp      | 30 +++++++--
 .../cudf/table/experimental/row_operators.cuh |  3 +-
 cpp/src/lists/dremel.cu                       | 48 ++++++++++----
 cpp/src/table/row_operators.cu                |  2 +-
 cpp/tests/search/search_list_test.cpp         | 64 ++++++++++++++++++-
 5 files changed, 124 insertions(+), 23 deletions(-)

diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp
index 4e3aeec2499..d36a4091947 100644
--- a/cpp/include/cudf/lists/detail/dremel.hpp
+++ b/cpp/include/cudf/lists/detail/dremel.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -183,16 +183,34 @@ struct dremel_data {
  *              - | - | -- | ---
  * ```
  *
- * @param col Column of LIST type
- * @param level_nullability Pre-determined nullability at each list level. Empty means infer from
- * `col`
+ * @param input Column of LIST type
+ * @param nullability Pre-determined nullability at each list level. Empty means infer from
+ * `input`
+ * @param output_as_byte_array if `true`, then any nested list level that has a child of type
+ * `uint8_t` will be considered as the last level
  * @param stream CUDA stream used for device memory operations and kernel launches.
- *
  * @return A struct containing dremel data
  */
-dremel_data get_dremel_data(column_view h_col,
+dremel_data get_dremel_data(column_view input,
                             std::vector<uint8_t> nullability,
                             bool output_as_byte_array,
                             rmm::cuda_stream_view stream);
 
+/**
+ * @brief Get Dremel offsets, repetition levels, and modified definition levels to be used for
+ *        lexicographical comparators. The modified definition levels are produced by treating
+ *        each nested column in the input as nullable
+ *
+ * @param input Column of LIST type
+ * @param nullability Pre-determined nullability at each list level. Empty means infer from
+ * `input`
+ * @param output_as_byte_array if `true`, then any nested list level that has a child of type
+ * `uint8_t` will be considered as the last level
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @return A struct containing dremel data
+ */
+dremel_data get_comparator_data(column_view input,
+                                std::vector<uint8_t> nullability,
+                                bool output_as_byte_array,
+                                rmm::cuda_stream_view stream);
 }  // namespace cudf::detail
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index f9ffbfcdf7b..2a207d2a5c4 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -487,7 +487,8 @@ class device_row_comparator {
           // element_index because either both rows have a deeply nested NULL at the
           // same position, and we'll "continue" in our iteration, or we will early
           // exit if only one of the rows has a deeply nested NULL
-          if (lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) {
+          if ((lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
+              (rcol.nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
             ++element_index;
           }
           if (l_def_level == r_def_level) { continue; }
diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu
index 26988622aee..c96a21df905 100644
--- a/cpp/src/lists/dremel.cu
+++ b/cpp/src/lists/dremel.cu
@@ -35,7 +35,7 @@
 #include <thrust/iterator/discard_iterator.h>
 
 namespace cudf::detail {
-
+namespace {
 /**
  * @brief Functor to get definition level value for a nested struct column until the leaf level or
  * the first list level.
@@ -46,6 +46,7 @@ struct def_level_fn {
   uint8_t const* d_nullability;
   uint8_t sub_level_start;
   uint8_t curr_def_level;
+  bool always_nullable;
 
   __device__ uint32_t operator()(size_type i)
   {
@@ -55,7 +56,7 @@ struct def_level_fn {
     auto col           = *parent_col;
     do {
       // If col not nullable then it does not contribute to def levels
-      if (d_nullability[l]) {
+      if (always_nullable or d_nullability[l]) {
         if (not col.nullable() or bit_is_set(col.null_mask(), i)) {
           ++def;
         } else {  // We have found the shallowest level at which this row is null
@@ -72,10 +73,11 @@ struct def_level_fn {
   }
 };
 
-dremel_data get_dremel_data(column_view h_col,
-                            std::vector<uint8_t> nullability,
-                            bool output_as_byte_array,
-                            rmm::cuda_stream_view stream)
+dremel_data get_encoding(column_view h_col,
+                         std::vector<uint8_t> nullability,
+                         bool output_as_byte_array,
+                         bool always_nullable,
+                         rmm::cuda_stream_view stream)
 {
   auto get_list_level = [](column_view col) {
     while (col.type().id() == type_id::STRUCT) {
@@ -173,14 +175,14 @@ dremel_data get_dremel_data(column_view h_col,
     uint32_t def = 0;
     start_at_sub_level.push_back(curr_nesting_level_idx);
     while (col.type().id() == type_id::STRUCT) {
-      def += (nullability[curr_nesting_level_idx]) ? 1 : 0;
+      def += (always_nullable or nullability[curr_nesting_level_idx]) ? 1 : 0;
       col = col.child(0);
       ++curr_nesting_level_idx;
     }
     // At the end of all those structs is either a list column or the leaf. List column contributes
     // at least one def level. Leaf contributes 1 level only if it is nullable.
-    def +=
-      (col.type().id() == type_id::LIST ? 1 : 0) + (nullability[curr_nesting_level_idx] ? 1 : 0);
+    def += (col.type().id() == type_id::LIST ? 1 : 0) +
+           (always_nullable or nullability[curr_nesting_level_idx] ? 1 : 0);
     def_at_level.push_back(def);
     ++curr_nesting_level_idx;
   };
@@ -209,7 +211,7 @@ dremel_data get_dremel_data(column_view h_col,
     }
   }
 
-  auto [device_view_owners, d_nesting_levels] =
+  [[maybe_unused]] auto [device_view_owners, d_nesting_levels] =
     contiguous_copy_column_device_views<column_device_view>(nesting_levels, stream);
 
   auto max_def_level = def_at_level.back();
@@ -297,7 +299,8 @@ dremel_data get_dremel_data(column_view h_col,
                                       def_level_fn{d_nesting_levels + level,
                                                    d_nullability.data(),
                                                    start_at_sub_level[level],
-                                                   def_at_level[level]});
+                                                   def_at_level[level],
+                                                   always_nullable});
 
     // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels
     auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1);
@@ -306,7 +309,8 @@ dremel_data get_dremel_data(column_view h_col,
                                       def_level_fn{d_nesting_levels + level + 1,
                                                    d_nullability.data(),
                                                    start_at_sub_level[level + 1],
-                                                   def_at_level[level + 1]});
+                                                   def_at_level[level + 1],
+                                                   always_nullable});
 
     // Zip the input and output value iterators so that merge operation is done only once
     auto input_parent_zip_it =
@@ -389,7 +393,8 @@ dremel_data get_dremel_data(column_view h_col,
                                       def_level_fn{d_nesting_levels + level,
                                                    d_nullability.data(),
                                                    start_at_sub_level[level],
-                                                   def_at_level[level]});
+                                                   def_at_level[level],
+                                                   always_nullable});
 
     // Zip the input and output value iterators so that merge operation is done only once
     auto input_parent_zip_it =
@@ -459,5 +464,22 @@ dremel_data get_dremel_data(column_view h_col,
                      leaf_data_size,
                      max_def_level};
 }
+}  // namespace
+
+dremel_data get_dremel_data(column_view h_col,
+                            std::vector<uint8_t> nullability,
+                            bool output_as_byte_array,
+                            rmm::cuda_stream_view stream)
+{
+  return get_encoding(h_col, nullability, output_as_byte_array, false, stream);
+}
+
+dremel_data get_comparator_data(column_view h_col,
+                                std::vector<uint8_t> nullability,
+                                bool output_as_byte_array,
+                                rmm::cuda_stream_view stream)
+{
+  return get_encoding(h_col, nullability, output_as_byte_array, true, stream);
+}
 
 }  // namespace cudf::detail
diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu
index 766a1b63905..8a63a6f6411 100644
--- a/cpp/src/table/row_operators.cu
+++ b/cpp/src/table/row_operators.cu
@@ -264,7 +264,7 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream)
   std::vector<detail::dremel_device_view> dremel_device_views;
   for (auto const& col : table) {
     if (col.type().id() == type_id::LIST) {
-      dremel_data.push_back(detail::get_dremel_data(col, {}, false, stream));
+      dremel_data.push_back(detail::get_comparator_data(col, {}, false, stream));
       dremel_device_views.push_back(dremel_data.back());
     }
   }
diff --git a/cpp/tests/search/search_list_test.cpp b/cpp/tests/search/search_list_test.cpp
index 1393095037d..1e97933fa4d 100644
--- a/cpp/tests/search/search_list_test.cpp
+++ b/cpp/tests/search/search_list_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@
 #include <cudf/search.hpp>
 #include <cudf/table/table_view.hpp>
 
-using namespace cudf::test::iterators;
+using cudf::test::iterators::null_at;
+using cudf::test::iterators::nulls_at;
 
 using bools_col   = cudf::test::fixed_width_column_wrapper<bool>;
 using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
@@ -347,3 +348,62 @@ TYPED_TEST(TypedListContainsTestColumnNeedles, ListsOfStructs)
   auto const result   = cudf::contains(*haystack, *needles);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result, verbosity);
 }
+
+struct ListLowerBound : public cudf::test::BaseFixture {
+};
+
+TEST_F(ListLowerBound, ListWithNulls)
+{
+  {
+    using lcw           = cudf::test::lists_column_wrapper<double>;
+    auto const haystack = lcw{
+      lcw{-3.45967821e+12},  // 0
+      lcw{-3.6912186e-32},   // 1
+      lcw{9.721175},         // 2
+    };
+
+    auto const needles = lcw{
+      lcw{{0, 4.22671e+32}, null_at(0)},
+    };
+
+    auto const expect = int32s_col{0};
+    auto const result = cudf::lower_bound(cudf::table_view{{haystack}},
+                                          cudf::table_view{{needles}},
+                                          {cudf::order::ASCENDING},
+                                          {cudf::null_order::BEFORE});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result);
+  }
+
+  {
+    using lcw       = cudf::test::lists_column_wrapper<int32_t, int32_t>;
+    auto const col1 = lcw{
+      lcw{{0}, null_at(0)},  // 0
+      lcw{-80},              // 1
+      lcw{-17},              // 2
+    };
+
+    auto const col2 = lcw{
+      lcw{27},               // 0
+      lcw{{0}, null_at(0)},  // 1
+      lcw{},                 // 2
+    };
+
+    auto const val1 = lcw{
+      lcw{87},
+    };
+
+    auto const val2 = lcw{
+      lcw{},
+    };
+
+    cudf::table_view input{{col1, col2}};
+    cudf::table_view values{{val1, val2}};
+    std::vector<cudf::order> column_order{cudf::order::ASCENDING, cudf::order::DESCENDING};
+    std::vector<cudf::null_order> null_order_flags{cudf::null_order::BEFORE,
+                                                   cudf::null_order::BEFORE};
+
+    auto const expect = int32s_col{3};
+    auto const result = cudf::lower_bound(input, values, column_order, null_order_flags);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result);
+  }
+}

From 8a7fb2f14a73937d31f648a65f57bc47751e97c1 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 24 Feb 2023 12:25:49 -0600
Subject: [PATCH 11/60] Deprecate `inplace` parameters in categorical methods
 (#12824)

To get ready for pandas-2.0 compatibility, this PR deprecates `inplace` in the following APIs:

- [x] `as_ordered`
- [x] `as_unordered`
- [x] `add_categories`
- [x] `remove_categories`
- [x] `set_categories`
- [x] `reorder_categories`

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/12824
---
 python/cudf/cudf/core/column/categorical.py | 78 ++++++++++++++++++++-
 python/cudf/cudf/tests/test_categorical.py  | 19 +++--
 2 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index a1526d25512..52f7c0b957f 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -141,6 +141,13 @@ def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]:
             or return a copy of this categorical with
             added categories.
 
+            .. deprecated:: 23.02
+
+               The `inplace` parameter is is deprecated and
+               will be removed in a future version of cudf.
+               Setting categories as ordered will always
+               return a new Categorical object.
+
         Returns
         -------
         Categorical
@@ -204,6 +211,13 @@ def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]:
             in-place or return a copy of this
             categorical with ordered set to False.
 
+            .. deprecated:: 23.02
+
+               The `inplace` parameter is is deprecated and
+               will be removed in a future version of cudf.
+               Setting categories as unordered will always
+               return a new Categorical object.
+
         Returns
         -------
         Categorical
@@ -286,6 +300,13 @@ def add_categories(
             or return a copy of this categorical with
             added categories.
 
+            .. deprecated:: 23.04
+
+               The `inplace` parameter is is deprecated and
+               will be removed in a future version of cudf.
+               Adding categories will always return a
+               new Categorical object.
+
         Returns
         -------
         cat
@@ -318,7 +339,14 @@ def add_categories(
         dtype: category
         Categories (5, int64): [1, 2, 0, 3, 4]
         """
-
+        if inplace:
+            warnings.warn(
+                "The `inplace` parameter in cudf.Series.cat.add_categories "
+                "is deprecated and will be removed in a future version of "
+                "cudf. Adding categories will always return a new "
+                "Categorical object.",
+                FutureWarning,
+            )
         old_categories = self._column.categories
         new_categories = column.as_column(
             new_categories,
@@ -371,6 +399,13 @@ def remove_categories(
             inplace or return a copy of this categorical
             with removed categories.
 
+            .. deprecated:: 23.04
+
+               The `inplace` parameter is is deprecated and
+               will be removed in a future version of cudf.
+               Removing categories will always return a
+               new Categorical object.
+
         Returns
         -------
         cat
@@ -423,6 +458,16 @@ def remove_categories(
         dtype: category
         Categories (2, int64): [1, 2]
         """
+        if inplace:
+            warnings.warn(
+                "The `inplace` parameter in "
+                "cudf.Series.cat.remove_categories is deprecated and "
+                "will be removed in a future version of cudf. "
+                "Removing categories will always return a new "
+                "Categorical object.",
+                FutureWarning,
+            )
+
         cats = self.categories.to_series()
         removals = cudf.Series(removals, dtype=cats.dtype)
         removals_mask = removals.isin(cats)
@@ -485,6 +530,13 @@ def set_categories(
             or return a copy of this categorical with
             reordered categories.
 
+            .. deprecated:: 23.04
+
+               The `inplace` parameter is is deprecated and
+               will be removed in a future version of cudf.
+               Setting categories will always return a
+               new Categorical object.
+
         Returns
         -------
         cat
@@ -524,6 +576,14 @@ def set_categories(
         dtype: category
         Categories (2, int64): [1, 10]
         """
+        if inplace:
+            warnings.warn(
+                "The `inplace` parameter in cudf.Series.cat.set_categories is "
+                "deprecated and will be removed in a future version of cudf. "
+                "Setting categories will always return a new Categorical "
+                "object.",
+                FutureWarning,
+            )
         return self._return_or_inplace(
             self._column.set_categories(
                 new_categories=new_categories, ordered=ordered, rename=rename
@@ -556,6 +616,13 @@ def reorder_categories(
             inplace or return a copy of this categorical
             with reordered categories.
 
+            .. deprecated:: 23.04
+
+               The `inplace` parameter is is deprecated and
+               will be removed in a future version of cudf.
+               Reordering categories will always return a
+               new Categorical object.
+
         Returns
         -------
         cat
@@ -597,6 +664,15 @@ def reorder_categories(
         ValueError: items in new_categories are not the same as in
         old categories
         """
+        if inplace:
+            warnings.warn(
+                "The `inplace` parameter in "
+                "cudf.Series.cat.reorder_categories is deprecated "
+                "and will be removed in a future version of cudf. "
+                "Reordering categories will always return a new "
+                "Categorical object.",
+                FutureWarning,
+            )
         return self._return_or_inplace(
             self._column.reorder_categories(new_categories, ordered=ordered),
             inplace=inplace,
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index fa8981cf7e3..496039ca2f8 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -443,10 +443,13 @@ def test_categorical_reorder_categories(
         "reorder_categories"
     ):
         pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), **kwargs)
-    cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), **kwargs)
     if inplace:
+        with pytest.warns(FutureWarning):
+            cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), **kwargs)
         pd_sr_1 = pd_sr
         cd_sr_1 = cd_sr
+    else:
+        cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), **kwargs)
 
     assert_eq(pd_sr_1, cd_sr_1)
 
@@ -479,10 +482,14 @@ def test_categorical_add_categories(pd_str_cat, inplace):
         "add_categories"
     ):
         pd_sr_1 = pd_sr.cat.add_categories(["d"], inplace=inplace)
-    cd_sr_1 = cd_sr.cat.add_categories(["d"], inplace=inplace)
+
     if inplace:
+        with pytest.warns(FutureWarning):
+            cd_sr_1 = cd_sr.cat.add_categories(["d"], inplace=inplace)
         pd_sr_1 = pd_sr
         cd_sr_1 = cd_sr
+    else:
+        cd_sr_1 = cd_sr.cat.add_categories(["d"], inplace=inplace)
 
     assert "d" in pd_sr_1.cat.categories.to_list()
     assert "d" in cd_sr_1.cat.categories.to_pandas().to_list()
@@ -516,10 +523,14 @@ def test_categorical_remove_categories(pd_str_cat, inplace):
         "remove_categories"
     ):
         pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace)
-    cd_sr_1 = cd_sr.cat.remove_categories(["a"], inplace=inplace)
+
     if inplace:
+        with pytest.warns(FutureWarning):
+            cd_sr_1 = cd_sr.cat.remove_categories(["a"], inplace=inplace)
         pd_sr_1 = pd_sr
         cd_sr_1 = cd_sr
+    else:
+        cd_sr_1 = cd_sr.cat.remove_categories(["a"], inplace=inplace)
 
     assert "a" not in pd_sr_1.cat.categories.to_list()
     assert "a" not in cd_sr_1.cat.categories.to_pandas().to_list()
@@ -529,7 +540,7 @@ def test_categorical_remove_categories(pd_str_cat, inplace):
     # test using ordered operators
     with _hide_deprecated_pandas_categorical_inplace_warnings(
         "remove_categories"
-    ):
+    ) as _, pytest.warns(FutureWarning) as _:
         assert_exceptions_equal(
             lfunc=cd_sr.to_pandas().cat.remove_categories,
             rfunc=cd_sr.cat.remove_categories,

From 54ee14e36157fe63d0eb58ed7ac8bafc2b1e4932 Mon Sep 17 00:00:00 2001
From: Jordan Jacobelli <jjacobelli@nvidia.com>
Date: Fri, 24 Feb 2023 19:37:29 +0100
Subject: [PATCH 12/60] Update datasets download URL (#12840)

Update datasets download URL to reduce latency and costs

Authors:
  - Jordan Jacobelli (https://github.com/jjacobelli)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12840
---
 python/cudf/cudf/benchmarks/get_datasets.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/benchmarks/get_datasets.py b/python/cudf/cudf/benchmarks/get_datasets.py
index f3b66eda512..7090539bcb0 100644
--- a/python/cudf/cudf/benchmarks/get_datasets.py
+++ b/python/cudf/cudf/benchmarks/get_datasets.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 import argparse
 import os
@@ -9,10 +9,7 @@
 Dataset = namedtuple("Dataset", ["url", "dir"])
 datasets = {
     "cuio_dataset": Dataset(
-        (
-            "https://rapidsai-data.s3.us-east-2.amazonaws.com/cudf/"
-            "benchmark/avro_json_datasets.zip"
-        ),
+        "https://data.rapids.ai/cudf/benchmark/avro_json_datasets.zip",
         "cudf/benchmarks/cuio_data/",
     ),
 }

From 12e4501c49daac3d0e3837a3f65078e63e20b904 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 24 Feb 2023 13:42:49 -0500
Subject: [PATCH 13/60] Remove KAFKA_HOST_TEST from compute-sanitizer check
 (#12831)

Removes the `KAFKA_HOST_TEST` from the compute-sanitizer memcheck nighly runs.
The following error occurs when running this host test.
```
Running compute-sanitizer on KAFKA_HOST_TEST
========= COMPUTE-SANITIZER
Running main() from gmock_main.cc
[==========] Running 2 tests from 1 test suite.
[----------] Global test environment set-up.
[----------] 2 tests from KafkaDatasourceTest
[ RUN      ] KafkaDatasourceTest.MissingGroupID
[       OK ] KafkaDatasourceTest.MissingGroupID (0 ms)
[ RUN      ] KafkaDatasourceTest.InvalidConfigValues
[       OK ] KafkaDatasourceTest.InvalidConfigValues (0 ms)
[----------] 2 tests from KafkaDatasourceTest (0 ms total)

[----------] Global test environment tear-down
[==========] 2 tests from 1 test suite ran. (0 ms total)
[  PASSED  ] 2 tests.
========= Error: Target application terminated before first instrumented API call
========= Tracking kernels launched by child processes requires the --target-processes all option.
```
Adding the `--target-processes all` option gives the same error.

Disabling the check of this test since it is a host test that checks error conditions and does not appear to make any device calls.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12831
---
 ci/test_cpp_memcheck.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/test_cpp_memcheck.sh b/ci/test_cpp_memcheck.sh
index 0cad4fc3a3f..db9ce143d51 100755
--- a/ci/test_cpp_memcheck.sh
+++ b/ci/test_cpp_memcheck.sh
@@ -11,7 +11,7 @@ set +e
 rapids-logger "Memcheck gtests with rmm_mode=cuda"
 export GTEST_CUDF_RMM_MODE=cuda
 COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
-for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
+for gt in "$CONDA_PREFIX"/bin/gtests/libcudf/* ; do
     test_name=$(basename ${gt})
     if [[ "$test_name" == "ERROR_TEST" ]] || [[ "$test_name" == "STREAM_IDENTIFICATION_TEST" ]]; then
         continue

From 77c2e03ec572527b5c5c7a3f7a48b0cabd29abde Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 24 Feb 2023 10:47:44 -0800
Subject: [PATCH 14/60] Consolidate linter configs into pyproject.toml (#12834)

This consolidation allows us to get rid of now unnecessary setup.cfg files (thanks to removing versioneer in #12741). It also allows us to move towards a fully pyproject.toml-driven build.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - David Wendt (https://github.com/davidwendt)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12834
---
 .flake8                                       | 24 +++++++
 .pre-commit-config.yaml                       | 12 ++--
 ci/release/update-version.sh                  |  2 +-
 cpp/benchmarks/common/generate_input.cu       |  4 +-
 cpp/benchmarks/common/generate_input.hpp      |  6 +-
 .../developer_guide/contributing_guide.md     |  8 +--
 pyproject.toml                                | 38 +++++++++++
 python/cudf/cudf/_lib/utils.pyx               |  4 +-
 python/cudf/pyproject.toml                    | 43 +++++++++++++
 python/cudf/setup.cfg                         | 32 ----------
 python/cudf_kafka/pyproject.toml              | 46 +++++++++++++
 python/cudf_kafka/setup.cfg                   | 35 ----------
 python/custreamz/pyproject.toml               | 45 +++++++++++++
 python/custreamz/setup.cfg                    | 34 ----------
 python/dask_cudf/pyproject.toml               | 45 +++++++++++++
 python/dask_cudf/setup.cfg                    | 31 ---------
 setup.cfg                                     | 64 -------------------
 17 files changed, 261 insertions(+), 212 deletions(-)
 create mode 100644 .flake8
 delete mode 100644 python/cudf/setup.cfg
 delete mode 100644 python/cudf_kafka/setup.cfg
 delete mode 100644 python/custreamz/setup.cfg
 delete mode 100644 setup.cfg

diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000000..e80e3afc443
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,24 @@
+# Copyright (c) 2017-2023, NVIDIA CORPORATION.
+
+[flake8]
+filename = *.py, *.pyx, *.pxd, *.pxi
+exclude = __init__.py, *.egg, build, docs, .git
+force-check = True
+ignore =
+    # line break before binary operator
+    W503,
+    # whitespace before :
+    E203
+per-file-ignores =
+    # Rules ignored only in Cython:
+    # E211: whitespace before '(' (used in multi-line imports)
+    # E225: Missing whitespace around operators (breaks cython casting syntax like <int>)
+    # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*)
+    # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax)
+    # E275: Missing whitespace after keyword (Doesn't work with Cython except?)
+    # E402: invalid syntax (works for Python, not Cython)
+    # E999: invalid syntax (works for Python, not Cython)
+    # W504: line break after binary operator (breaks lines that end with a pointer)
+    *.pyx: E211, E225, E226, E227, E275, E402, E999, W504
+    *.pxd: E211, E225, E226, E227, E275, E402, E999, W504
+    *.pxi: E211, E225, E226, E227, E275, E402, E999, W504
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 244fc0d3872..e252af717ce 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
         rev: 5.0.4
         hooks:
               - id: flake8
-                args: ["--config=setup.cfg"]
+                args: ["--config=.flake8"]
                 files: python/.*$
                 types: [file]
                 types_or: [python, cython]
@@ -48,7 +48,7 @@ repos:
         hooks:
               - id: mypy
                 additional_dependencies: [types-cachetools]
-                args: ["--config-file=setup.cfg",
+                args: ["--config-file=pyproject.toml",
                        "python/cudf/cudf",
                        "python/custreamz/custreamz",
                        "python/cudf_kafka/cudf_kafka",
@@ -58,7 +58,9 @@ repos:
         rev: 6.1.1
         hooks:
               - id: pydocstyle
-                args: ["--config=setup.cfg"]
+                # https://github.com/PyCQA/pydocstyle/issues/603
+                additional_dependencies: [toml]
+                args: ["--config=pyproject.toml"]
       - repo: https://github.com/pre-commit/mirrors-clang-format
         rev: v11.1.0
         hooks:
@@ -138,9 +140,11 @@ repos:
                 pass_filenames: false
                 verbose: false
       - repo: https://github.com/codespell-project/codespell
-        rev: v2.1.0
+        rev: v2.2.2
         hooks:
               - id: codespell
+                additional_dependencies: [tomli]
+                args: ["--toml", "pyproject.toml"]
                 exclude: |
                   (?x)^(
                     .*test.*|
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index c8875fda641..831b91bb2a6 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -94,7 +94,7 @@ sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/setup
 sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/setup.py
 
 # Dependency versions in pyproject.toml
-sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/pyproject.toml
+sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/pyproject.toml
 
 for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index dee7e2b8586..2829d14070c 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -501,7 +501,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
   rmm::device_uvector<cudf::size_type> offsets(num_rows + 1, cudf::get_default_stream());
   thrust::exclusive_scan(
     thrust::device, valid_lengths, valid_lengths + lengths.size(), offsets.begin());
-  // offfsets are ready.
+  // offsets are ready.
   auto chars_length = *thrust::device_pointer_cast(offsets.end() - 1);
   rmm::device_uvector<char> chars(chars_length, cudf::get_default_stream());
   thrust::for_each_n(thrust::device,
diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp
index f8ea194f0c4..e65aa69763b 100644
--- a/cpp/benchmarks/common/generate_input.hpp
+++ b/cpp/benchmarks/common/generate_input.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -373,13 +373,13 @@ class data_profile {
 
   void set_bool_probability_true(double p)
   {
-    CUDF_EXPECTS(p >= 0. and p <= 1., "probablity must be in range [0...1]");
+    CUDF_EXPECTS(p >= 0. and p <= 1., "probability must be in range [0...1]");
     bool_probability_true = p;
   }
   void set_null_probability(std::optional<double> p)
   {
     CUDF_EXPECTS(p.value_or(0.) >= 0. and p.value_or(0.) <= 1.,
-                 "probablity must be in range [0...1]");
+                 "probability must be in range [0...1]");
     null_probability = p;
   }
   void set_cardinality(cudf::size_type c) { cardinality = c; }
diff --git a/docs/cudf/source/developer_guide/contributing_guide.md b/docs/cudf/source/developer_guide/contributing_guide.md
index 34071f44914..bb3479cf4c1 100644
--- a/docs/cudf/source/developer_guide/contributing_guide.md
+++ b/docs/cudf/source/developer_guide/contributing_guide.md
@@ -22,16 +22,16 @@ Specifically, cuDF uses the following tools:
   In conjunction with [type hints](https://docs.python.org/3/library/typing.html),
   `mypy` can help catch various bugs that are otherwise difficult to find.
 - [`pydocstyle`](https://github.com/PyCQA/pydocstyle/) lints docstring style.
+- [`codespell`](https://github.com/codespell-project/codespell) finds spelling errors.
 
 Linter config data is stored in a number of files.
-We generally use `pyproject.toml` over `setup.cfg` and avoid project-specific files (e.g. `setup.cfg` > `python/cudf/setup.cfg`).
+We generally use `pyproject.toml` over `setup.cfg` and avoid project-specific files (e.g. `pyproject.toml` > `python/cudf/pyproject.toml`).
 However, differences between tools and the different packages in the repo result in the following caveats:
 
-- `flake8` has no plans to support `pyproject.toml`, so it must live in `setup.cfg`.
+- `flake8` has no plans to support `pyproject.toml`, so it must live in `.flake8`.
 - `isort` must be configured per project to set which project is the "first party" project.
 
-Additionally, our use of `versioneer` means that each project must have a `setup.cfg`.
-As a result, we currently maintain both root and project-level `pyproject.toml` and `setup.cfg` files.
+As a result, we currently maintain both root and project-level `pyproject.toml` files as well as a `.flake8` file.
 
 For more information on how to use pre-commit hooks, see the code formatting section of the
 [overall contributing guide](https://github.com/rapidsai/cudf/blob/main/CONTRIBUTING.md#python--pre-commit-hooks).
diff --git a/pyproject.toml b/pyproject.toml
index dfd22f33785..3940d9119ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,3 +17,41 @@ force-exclude = '''
     dist
 )/
 '''
+
+[tool.pydocstyle]
+# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather
+# than include using match-dir. Note that as discussed in
+# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle,
+# unlike the match option above this match-dir will have no effect when
+# pydocstyle is invoked from pre-commit. Therefore this exclusion list must
+# also be maintained in the pre-commit config file.
+match-dir = "^(?!(ci|cpp|conda|docs|java|notebooks)).*$"
+# Allow missing docstrings for docutils
+ignore-decorators = ".*(docutils|doc_apply|copy_docstring).*"
+select = "D201, D204, D206, D207, D208, D209, D210, D211, D214, D215, D300, D301, D302, D403, D405, D406, D407, D408, D409, D410, D411, D412, D414, D418"
+    # Would like to enable the following rules in the future:
+    # D200, D202, D205, D400
+
+[tool.mypy]
+ignore_missing_imports = true
+# If we don't specify this, then mypy will check excluded files if
+# they are imported by a checked file.
+follow_imports = "skip"
+exclude = [
+    "cudf/_lib/",
+    "cudf/cudf/benchmarks/",
+    "cudf/cudf/tests/",
+    "cudf/cudf/utils/metadata/orc_column_statistics_pb2.py",
+    "custreamz/custreamz/tests/",
+    "dask_cudf/dask_cudf/tests/",
+ ]
+
+[tool.codespell]
+# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
+# this is only to allow you to run codespell interactively
+skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,./cpp/tests,./python/cudf/cudf/tests,./java/src/test,./cpp/include/cudf_test/cxxopts.hpp"
+# ignore short words, and typename parameters like OffsetT
+ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
+ignore-words-list = "inout,unparseable,falsy"
+builtin = "clear"
+quiet-level = 3
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 5f4d3e17fbc..56918799cca 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 import numpy as np
 import pyarrow as pa
@@ -315,7 +315,7 @@ cdef columns_from_table_view(
     object owners,
 ):
     """
-    Given a ``cudf::table_view``, construsts a list of columns from it,
+    Given a ``cudf::table_view``, constructs a list of columns from it,
     along with referencing an owner Python object that owns the memory
     lifetime. owner must be either None or a list of column. If owner
     is a list of columns, the owner of the `i`th ``cudf::column_view``
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 49c4d83245f..305e8822030 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -15,3 +15,46 @@ requires = [
     "protoc-wheel",
     "rmm==23.4.*",
 ]
+
+[tool.isort]
+line_length = 79
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+combine_as_imports = true
+order_by_type = true
+known_dask = [
+    "dask",
+    "distributed",
+    "dask_cuda",
+]
+known_rapids = [
+    "rmm",
+]
+known_first_party = [
+    "cudf",
+]
+default_section = "THIRDPARTY"
+sections = [
+    "FUTURE",
+    "STDLIB",
+    "THIRDPARTY",
+    "DASK",
+    "RAPIDS",
+    "FIRSTPARTY",
+    "LOCALFOLDER",
+]
+skip = [
+    "thirdparty",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".tox",
+    ".venv",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "__init__.py",
+]
diff --git a/python/cudf/setup.cfg b/python/cudf/setup.cfg
deleted file mode 100644
index 8380da371f9..00000000000
--- a/python/cudf/setup.cfg
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
-
-[isort]
-line_length=79
-multi_line_output=3
-include_trailing_comma=True
-force_grid_wrap=0
-combine_as_imports=True
-order_by_type=True
-known_dask=
-    dask
-    distributed
-    dask_cuda
-known_rapids=
-    rmm
-known_first_party=
-    cudf
-default_section=THIRDPARTY
-sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER
-skip=
-    thirdparty
-    .eggs
-    .git
-    .hg
-    .mypy_cache
-    .tox
-    .venv
-    _build
-    buck-out
-    build
-    dist
-    __init__.py
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 0924fc90352..308a7869bc0 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -7,3 +7,49 @@ requires = [
     "setuptools",
     "cython>=0.29,<0.30",
 ]
+
+[tool.isort]
+line_length = 79
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+combine_as_imports = true
+order_by_type = true
+known_dask = [
+    "dask",
+    "distributed",
+    "dask_cuda",
+    "streamz",
+]
+known_rapids = [
+    "rmm",
+    "cudf",
+    "dask_cudf",
+]
+known_first_party = [
+    "cudf_kafka",
+]
+default_section = "THIRDPARTY"
+sections = [
+    "FUTURE",
+    "STDLIB",
+    "THIRDPARTY",
+    "DASK",
+    "RAPIDS",
+    "FIRSTPARTY",
+    "LOCALFOLDER",
+]
+skip = [
+    "thirdparty",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".tox",
+    ".venv",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "__init__.py",
+]
diff --git a/python/cudf_kafka/setup.cfg b/python/cudf_kafka/setup.cfg
deleted file mode 100644
index ee0d783b184..00000000000
--- a/python/cudf_kafka/setup.cfg
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-
-[isort]
-line_length=79
-multi_line_output=3
-include_trailing_comma=True
-force_grid_wrap=0
-combine_as_imports=True
-order_by_type=True
-known_dask=
-    dask
-    distributed
-    dask_cuda
-    streamz
-known_rapids=
-    rmm
-    cudf
-    dask_cudf
-known_first_party=
-    cudf_kafka
-default_section=THIRDPARTY
-sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER
-skip=
-    thirdparty
-    .eggs
-    .git
-    .hg
-    .mypy_cache
-    .tox
-    .venv
-    _build
-    buck-out
-    build
-    dist
-    __init__.py
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index 806848c356e..d5c41945482 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -6,3 +6,48 @@ requires = [
     "wheel",
     "setuptools",
 ]
+
+[tool.isort]
+line_length = 79
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+combine_as_imports = true
+order_by_type = true
+known_dask = [
+    "dask",
+    "distributed",
+    "dask_cuda",
+]
+known_rapids = [
+    "rmm",
+    "cudf",
+    "dask_cudf",
+]
+known_first_party = [
+    "streamz",
+]
+default_section = "THIRDPARTY"
+sections = [
+    "FUTURE",
+    "STDLIB",
+    "THIRDPARTY",
+    "DASK",
+    "RAPIDS",
+    "FIRSTPARTY",
+    "LOCALFOLDER",
+]
+skip = [
+    "thirdparty",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".tox",
+    ".venv",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "__init__.py",
+]
diff --git a/python/custreamz/setup.cfg b/python/custreamz/setup.cfg
deleted file mode 100644
index 8c038db9349..00000000000
--- a/python/custreamz/setup.cfg
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-
-[isort]
-line_length=79
-multi_line_output=3
-include_trailing_comma=True
-force_grid_wrap=0
-combine_as_imports=True
-order_by_type=True
-known_dask=
-    dask
-    distributed
-    dask_cuda
-known_rapids=
-    rmm
-    cudf
-    dask_cudf
-known_first_party=
-    streamz
-default_section=THIRDPARTY
-sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER
-skip=
-    thirdparty
-    .eggs
-    .git
-    .hg
-    .mypy_cache
-    .tox
-    .venv
-    _build
-    buck-out
-    build
-    dist
-    __init__.py
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 806848c356e..8cf823d4291 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -6,3 +6,48 @@ requires = [
     "wheel",
     "setuptools",
 ]
+
+[tool.isort]
+line_length = 79
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+combine_as_imports = true
+order_by_type = true
+
+known_dask = [
+    "dask",
+    "distributed",
+    "dask_cuda",
+]
+known_rapids = [
+    "rmm",
+    "cudf",
+]
+known_first_party = [
+    "dask_cudf",
+]
+
+default_section = "THIRDPARTY"
+sections = [
+    "FUTURE",
+    "STDLIB",
+    "THIRDPARTY",
+    "DASK",
+    "RAPIDS",
+    "FIRSTPARTY",
+    "LOCALFOLDER",
+]
+skip = [
+    "thirdparty",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".tox",
+    ".venv",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+]
diff --git a/python/dask_cudf/setup.cfg b/python/dask_cudf/setup.cfg
index 66f4b8891d0..8139b3c7dc6 100644
--- a/python/dask_cudf/setup.cfg
+++ b/python/dask_cudf/setup.cfg
@@ -1,36 +1,5 @@
 # Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
-[isort]
-line_length=79
-multi_line_output=3
-include_trailing_comma=True
-force_grid_wrap=0
-combine_as_imports=True
-order_by_type=True
-known_dask=
-    dask
-    distributed
-    dask_cuda
-known_rapids=
-    rmm
-    cudf
-known_first_party=
-    dask_cudf
-default_section=THIRDPARTY
-sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER
-skip=
-    thirdparty
-    .eggs
-    .git
-    .hg
-    .mypy_cache
-    .tox
-    .venv
-    _build
-    buck-out
-    build
-    dist
-
 [options.entry_points]
 dask.dataframe.backends =
     cudf = dask_cudf.backends:CudfBackendEntrypoint
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 962b7d73bbe..00000000000
--- a/setup.cfg
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2017-2023, NVIDIA CORPORATION.
-
-[flake8]
-filename = *.py, *.pyx, *.pxd, *.pxi
-exclude = __init__.py, *.egg, build, docs, .git
-force-check = True
-ignore =
-    # line break before binary operator
-    W503,
-    # whitespace before :
-    E203
-per-file-ignores =
-    # Rules ignored only in Cython:
-    # E211: whitespace before '(' (used in multi-line imports)
-    # E225: Missing whitespace around operators (breaks cython casting syntax like <int>)
-    # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*)
-    # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax)
-    # E275: Missing whitespace after keyword (Doesn't work with Cython except?)
-    # E402: invalid syntax (works for Python, not Cython)
-    # E999: invalid syntax (works for Python, not Cython)
-    # W504: line break after binary operator (breaks lines that end with a pointer)
-    *.pyx: E211, E225, E226, E227, E275, E402, E999, W504
-    *.pxd: E211, E225, E226, E227, E275, E402, E999, W504
-    *.pxi: E211, E225, E226, E227, E275, E402, E999, W504
-
-[pydocstyle]
-# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather
-# than include using match-dir. Note that as discussed in
-# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle,
-# unlike the match option above this match-dir will have no effect when
-# pydocstyle is invoked from pre-commit. Therefore this exclusion list must
-# also be maintained in the pre-commit config file.
-match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks)).*$
-# Allow missing docstrings for docutils
-ignore-decorators = .*(docutils|doc_apply|copy_docstring).*
-select =
-    D201, D204, D206, D207, D208, D209, D210, D211, D214, D215, D300, D301, D302, D403, D405, D406, D407, D408, D409, D410, D411, D412, D414, D418
-    # Would like to enable the following rules in the future:
-    # D200, D202, D205, D400
-
-[mypy]
-ignore_missing_imports = True
-# If we don't specify this, then mypy will check excluded files if
-# they are imported by a checked file.
-follow_imports = skip
-exclude = (?x)(
-  cudf/_lib/
-  | cudf/cudf/benchmarks/
-  | cudf/cudf/tests/
-  | cudf/cudf/utils/metadata/orc_column_statistics_pb2.py
-  | custreamz/custreamz/tests/
-  | dask_cudf/dask_cudf/tests/
-  # This close paren cannot be in column zero otherwise the config parser barfs
- )
-
-[codespell]
-# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
-# this is only to allow you to run codespell interactively
-skip = ./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,./cpp/tests,./python/cudf/cudf/tests,./java/src/test,./cpp/include/cudf_test/cxxopts.hpp
-# ignore short words, and typename parameters like OffsetT
-ignore-regex = \b(.{1,4}|[A-Z]\w*T)\b
-ignore-words-list = inout,unparseable
-builtin = clear
-quiet-level = 3

From 4f2f37987fbd66de0cc9116734d2094ca4a39948 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 24 Feb 2023 17:04:59 -0600
Subject: [PATCH 15/60] Enable nbqa pre-commit hooks for isort and black.
 (#12848)

This enables `black` and `isort` linters for ipynb notebooks via [nbqa](https://github.com/nbQA-dev/nbQA). I propose this change to avoid manually linting notebooks like https://github.com/rapidsai/cudf/pull/12595. cc: @galipremsagar

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12848
---
 .pre-commit-config.yaml                       |  10 ++
 docs/cudf/source/user_guide/10min.ipynb       |   1 +
 .../cudf/source/user_guide/cupy-interop.ipynb |  34 ++--
 .../source/user_guide/guide-to-udfs.ipynb     | 149 +++++++++---------
 .../cudf/source/user_guide/missing-data.ipynb |  56 ++++---
 5 files changed, 141 insertions(+), 109 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e252af717ce..a030f3bd25b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -61,6 +61,16 @@ repos:
                 # https://github.com/PyCQA/pydocstyle/issues/603
                 additional_dependencies: [toml]
                 args: ["--config=pyproject.toml"]
+      - repo: https://github.com/nbQA-dev/nbQA
+        rev: 1.6.3
+        hooks:
+              - id: nbqa-isort
+                # Use the cudf_kafka isort orderings in notebooks so that dask
+                # and RAPIDS packages have their own sections.
+                args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
+              - id: nbqa-black
+                # Explicitly specify the pyproject.toml at the repo root, not per-project.
+                args: ["--config=pyproject.toml"]
       - repo: https://github.com/pre-commit/mirrors-clang-format
         rev: v11.1.0
         hooks:
diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
index af938b79a29..0352c624e04 100644
--- a/docs/cudf/source/user_guide/10min.ipynb
+++ b/docs/cudf/source/user_guide/10min.ipynb
@@ -35,6 +35,7 @@
     "\n",
     "import cupy as cp\n",
     "import pandas as pd\n",
+    "\n",
     "import cudf\n",
     "import dask_cudf\n",
     "\n",
diff --git a/docs/cudf/source/user_guide/cupy-interop.ipynb b/docs/cudf/source/user_guide/cupy-interop.ipynb
index 3e169984ace..c98a4ddea23 100644
--- a/docs/cudf/source/user_guide/cupy-interop.ipynb
+++ b/docs/cudf/source/user_guide/cupy-interop.ipynb
@@ -18,9 +18,10 @@
    "outputs": [],
    "source": [
     "import timeit\n",
-    "from packaging import version\n",
     "\n",
     "import cupy as cp\n",
+    "from packaging import version\n",
+    "\n",
     "import cudf\n",
     "\n",
     "if version.parse(cp.__version__) >= version.parse(\"10.0.0\"):\n",
@@ -63,10 +64,13 @@
    ],
    "source": [
     "nelem = 10000\n",
-    "df = cudf.DataFrame({'a':range(nelem),\n",
-    "                     'b':range(500, nelem + 500),\n",
-    "                     'c':range(1000, nelem + 1000)}\n",
-    "                   )\n",
+    "df = cudf.DataFrame(\n",
+    "    {\n",
+    "        \"a\": range(nelem),\n",
+    "        \"b\": range(500, nelem + 500),\n",
+    "        \"c\": range(1000, nelem + 1000),\n",
+    "    }\n",
+    ")\n",
     "\n",
     "%timeit arr_cupy = cupy_from_dlpack(df.to_dlpack())\n",
     "%timeit arr_cupy = df.values\n",
@@ -138,7 +142,7 @@
     }
    ],
    "source": [
-    "col = 'a'\n",
+    "col = \"a\"\n",
     "\n",
     "%timeit cola_cupy = cp.asarray(df[col])\n",
     "%timeit cola_cupy = cupy_from_dlpack(df[col].to_dlpack())\n",
@@ -1088,14 +1092,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def cudf_to_cupy_sparse_matrix(data, sparseformat='column'):\n",
-    "    \"\"\"Converts a cuDF object to a CuPy Sparse Column matrix.\n",
-    "    \"\"\"\n",
-    "    if sparseformat not in ('row', 'column',):\n",
+    "def cudf_to_cupy_sparse_matrix(data, sparseformat=\"column\"):\n",
+    "    \"\"\"Converts a cuDF object to a CuPy Sparse Column matrix.\"\"\"\n",
+    "    if sparseformat not in (\n",
+    "        \"row\",\n",
+    "        \"column\",\n",
+    "    ):\n",
     "        raise ValueError(\"Let's focus on column and row formats for now.\")\n",
-    "    \n",
+    "\n",
     "    _sparse_constructor = cp.sparse.csc_matrix\n",
-    "    if sparseformat == 'row':\n",
+    "    if sparseformat == \"row\":\n",
     "        _sparse_constructor = cp.sparse.csr_matrix\n",
     "\n",
     "    return _sparse_constructor(cupy_from_dlpack(data.to_dlpack()))"
@@ -1121,8 +1127,8 @@
     "nonzero = 1000\n",
     "for i in range(20):\n",
     "    arr = cp.random.normal(5, 5, nelem)\n",
-    "    arr[cp.random.choice(arr.shape[0], nelem-nonzero, replace=False)] = 0\n",
-    "    df['a' + str(i)] = arr"
+    "    arr[cp.random.choice(arr.shape[0], nelem - nonzero, replace=False)] = 0\n",
+    "    df[\"a\" + str(i)] = arr"
    ]
   },
   {
diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
index 943fc980a31..ba8c65784d2 100644
--- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb
+++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -15,9 +15,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
+    "\n",
     "import cudf\n",
-    "from cudf.datasets import randomdata\n",
-    "import numpy as np"
+    "from cudf.datasets import randomdata"
    ]
   },
   {
@@ -375,7 +376,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sr = cudf.Series(['', 'abc', 'some_example'])"
+    "sr = cudf.Series([\"\", \"abc\", \"some_example\"])"
    ]
   },
   {
@@ -387,9 +388,9 @@
    "source": [
     "def f(st):\n",
     "    if len(st) > 0:\n",
-    "        if st.startswith('a'):\n",
+    "        if st.startswith(\"a\"):\n",
     "            return 1\n",
-    "        elif 'example' in st:\n",
+    "        elif \"example\" in st:\n",
     "            return 2\n",
     "        else:\n",
     "            return -1\n",
@@ -443,6 +444,7 @@
    "outputs": [],
    "source": [
     "from cudf.core.udf.utils import set_malloc_heap_size\n",
+    "\n",
     "set_malloc_heap_size(int(2e9))"
    ]
   },
@@ -472,7 +474,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = randomdata(nrows=5, dtypes={'a':int, 'b':int, 'c':int}, seed=12)"
+    "df = randomdata(nrows=5, dtypes={\"a\": int, \"b\": int, \"c\": int}, seed=12)"
    ]
   },
   {
@@ -484,10 +486,11 @@
    "source": [
     "from numba import cuda\n",
     "\n",
+    "\n",
     "@cuda.jit\n",
     "def multiply(in_col, out_col, multiplier):\n",
     "    i = cuda.grid(1)\n",
-    "    if i < in_col.size: # boundary guard\n",
+    "    if i < in_col.size:  # boundary guard\n",
     "        out_col[i] = in_col[i] * multiplier"
    ]
   },
@@ -508,9 +511,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "size = len(df['a'])\n",
-    "df['e'] = 0.0\n",
-    "multiply.forall(size)(df['a'], df['e'], 10.0)"
+    "size = len(df[\"a\"])\n",
+    "df[\"e\"] = 0.0\n",
+    "multiply.forall(size)(df[\"a\"], df[\"e\"], 10.0)"
    ]
   },
   {
@@ -658,7 +661,7 @@
    "outputs": [],
    "source": [
     "def f(row):\n",
-    "    return row['A'] + row['B']"
+    "    return row[\"A\"] + row[\"B\"]"
    ]
   },
   {
@@ -733,10 +736,7 @@
     }
    ],
    "source": [
-    "df = cudf.DataFrame({\n",
-    "    'A': [1,2,3],\n",
-    "    'B': [4,cudf.NA,6]\n",
-    "})\n",
+    "df = cudf.DataFrame({\"A\": [1, 2, 3], \"B\": [4, cudf.NA, 6]})\n",
     "df"
    ]
   },
@@ -881,13 +881,14 @@
    ],
    "source": [
     "def f(row):\n",
-    "    x = row['a']\n",
+    "    x = row[\"a\"]\n",
     "    if x is cudf.NA:\n",
     "        return 0\n",
     "    else:\n",
     "        return x + 1\n",
     "\n",
-    "df = cudf.DataFrame({'a': [1, cudf.NA, 3]})\n",
+    "\n",
+    "df = cudf.DataFrame({\"a\": [1, cudf.NA, 3]})\n",
     "df"
    ]
   },
@@ -988,17 +989,15 @@
    ],
    "source": [
     "def f(row):\n",
-    "    x = row['a']\n",
-    "    y = row['b']\n",
+    "    x = row[\"a\"]\n",
+    "    y = row[\"b\"]\n",
     "    if x + y > 3:\n",
     "        return cudf.NA\n",
     "    else:\n",
     "        return x + y\n",
     "\n",
-    "df = cudf.DataFrame({\n",
-    "    'a': [1, 2, 3], \n",
-    "    'b': [2, 1, 1]\n",
-    "})\n",
+    "\n",
+    "df = cudf.DataFrame({\"a\": [1, 2, 3], \"b\": [2, 1, 1]})\n",
     "df"
    ]
   },
@@ -1099,12 +1098,10 @@
    ],
    "source": [
     "def f(row):\n",
-    "     return row['a'] + row['b']\n",
+    "    return row[\"a\"] + row[\"b\"]\n",
+    "\n",
     "\n",
-    "df = cudf.DataFrame({\n",
-    "    'a': [1, 2, 3], \n",
-    "    'b': [0.5, cudf.NA, 3.14]\n",
-    "})\n",
+    "df = cudf.DataFrame({\"a\": [1, 2, 3], \"b\": [0.5, cudf.NA, 3.14]})\n",
     "df"
    ]
   },
@@ -1214,15 +1211,14 @@
    ],
    "source": [
     "def f(row):\n",
-    "    x = row['a']\n",
+    "    x = row[\"a\"]\n",
     "    if x > 3:\n",
-    "            return x\n",
+    "        return x\n",
     "    else:\n",
-    "            return 1.5\n",
+    "        return 1.5\n",
+    "\n",
     "\n",
-    "df = cudf.DataFrame({\n",
-    "    'a': [1, 3, 5]\n",
-    "})\n",
+    "df = cudf.DataFrame({\"a\": [1, 3, 5]})\n",
     "df"
    ]
   },
@@ -1335,15 +1331,18 @@
    ],
    "source": [
     "def f(row):\n",
-    "    return row['a'] + (row['b'] - (row['c'] / row['d'])) % row['e']\n",
+    "    return row[\"a\"] + (row[\"b\"] - (row[\"c\"] / row[\"d\"])) % row[\"e\"]\n",
     "\n",
-    "df = cudf.DataFrame({\n",
-    "    'a': [1, 2, 3],\n",
-    "    'b': [4, 5, 6],\n",
-    "    'c': [cudf.NA, 4, 4],\n",
-    "    'd': [8, 7, 8],\n",
-    "    'e': [7, 1, 6]\n",
-    "})\n",
+    "\n",
+    "df = cudf.DataFrame(\n",
+    "    {\n",
+    "        \"a\": [1, 2, 3],\n",
+    "        \"b\": [4, 5, 6],\n",
+    "        \"c\": [cudf.NA, 4, 4],\n",
+    "        \"d\": [8, 7, 8],\n",
+    "        \"e\": [7, 1, 6],\n",
+    "    }\n",
+    ")\n",
     "df"
    ]
   },
@@ -1451,10 +1450,9 @@
     }
    ],
    "source": [
-    "str_df = cudf.DataFrame({\n",
-    "    'str_col': ['abc', 'ABC', 'Example'],\n",
-    "    'scale': [1, 2, 3]\n",
-    "})\n",
+    "str_df = cudf.DataFrame(\n",
+    "    {\"str_col\": [\"abc\", \"ABC\", \"Example\"], \"scale\": [1, 2, 3]}\n",
+    ")\n",
     "str_df"
    ]
   },
@@ -1466,9 +1464,9 @@
    "outputs": [],
    "source": [
     "def f(row):\n",
-    "    st = row['str_col']\n",
-    "    scale = row['scale']\n",
-    "    \n",
+    "    st = row[\"str_col\"]\n",
+    "    scale = row[\"scale\"]\n",
+    "\n",
     "    if len(st) > 5:\n",
     "        return len(st) + scale\n",
     "    else:\n",
@@ -1626,11 +1624,12 @@
     }
    ],
    "source": [
-    "df = df.apply_rows(conditional_add, \n",
-    "                   incols={'a':'x', 'e':'y'},\n",
-    "                   outcols={'out': np.float64},\n",
-    "                   kwargs={}\n",
-    "                  )\n",
+    "df = df.apply_rows(\n",
+    "    conditional_add,\n",
+    "    incols={\"a\": \"x\", \"e\": \"y\"},\n",
+    "    outcols={\"out\": np.float64},\n",
+    "    kwargs={},\n",
+    ")\n",
     "df.head()"
    ]
   },
@@ -1738,10 +1737,11 @@
     "    for i, (x, y) in enumerate(zip(a, b)):\n",
     "        out[i] = x + y\n",
     "\n",
-    "df = randomdata(nrows=5, dtypes={'a':int, 'b':int, 'c':int}, seed=12)\n",
-    "df.loc[2, 'a'] = None\n",
-    "df.loc[3, 'b'] = None\n",
-    "df.loc[1, 'c'] = None\n",
+    "\n",
+    "df = randomdata(nrows=5, dtypes={\"a\": int, \"b\": int, \"c\": int}, seed=12)\n",
+    "df.loc[2, \"a\"] = None\n",
+    "df.loc[3, \"b\"] = None\n",
+    "df.loc[1, \"c\"] = None\n",
     "df.head()"
    ]
   },
@@ -1841,10 +1841,9 @@
     }
    ],
    "source": [
-    "df = df.apply_rows(gpu_add, \n",
-    "              incols=['a', 'b'],\n",
-    "              outcols={'out':np.float64},\n",
-    "              kwargs={})\n",
+    "df = df.apply_rows(\n",
+    "    gpu_add, incols=[\"a\", \"b\"], outcols={\"out\": np.float64}, kwargs={}\n",
+    ")\n",
     "df.head()"
    ]
   },
@@ -1892,7 +1891,7 @@
     }
    ],
    "source": [
-    "ser = cudf.Series([16, 25, 36, 49, 64, 81], dtype='float64')\n",
+    "ser = cudf.Series([16, 25, 36, 49, 64, 81], dtype=\"float64\")\n",
     "ser"
    ]
   },
@@ -1935,12 +1934,13 @@
    "source": [
     "import math\n",
     "\n",
+    "\n",
     "def example_func(window):\n",
     "    b = 0\n",
     "    for a in window:\n",
     "        b = max(b, math.sqrt(a))\n",
     "    if b == 8:\n",
-    "        return 100    \n",
+    "        return 100\n",
     "    return b"
    ]
   },
@@ -2064,8 +2064,8 @@
    ],
    "source": [
     "df2 = cudf.DataFrame()\n",
-    "df2['a'] = np.arange(55, 65, dtype='float64')\n",
-    "df2['b'] = np.arange(55, 65, dtype='float64')\n",
+    "df2[\"a\"] = np.arange(55, 65, dtype=\"float64\")\n",
+    "df2[\"b\"] = np.arange(55, 65, dtype=\"float64\")\n",
     "df2.head()"
    ]
   },
@@ -2279,7 +2279,9 @@
     }
    ],
    "source": [
-    "df = randomdata(nrows=10, dtypes={'a':float, 'b':bool, 'c':str, 'e': float}, seed=12)\n",
+    "df = randomdata(\n",
+    "    nrows=10, dtypes={\"a\": float, \"b\": bool, \"c\": str, \"e\": float}, seed=12\n",
+    ")\n",
     "df.head()"
    ]
   },
@@ -2290,7 +2292,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "grouped = df.groupby(['b'])"
+    "grouped = df.groupby([\"b\"])"
    ]
   },
   {
@@ -2469,9 +2471,9 @@
     }
    ],
    "source": [
-    "results = grouped.apply_grouped(rolling_avg,\n",
-    "                               incols=['e'],\n",
-    "                               outcols=dict(rolling_avg_e=np.float64))\n",
+    "results = grouped.apply_grouped(\n",
+    "    rolling_avg, incols=[\"e\"], outcols=dict(rolling_avg_e=np.float64)\n",
+    ")\n",
     "results"
    ]
   },
@@ -2554,8 +2556,9 @@
     "    i = cuda.grid(1)\n",
     "    if i < x.size:\n",
     "        out[i] = x[i] * 5\n",
-    "        \n",
-    "out = cudf.Series(cp.zeros(len(s), dtype='int32'))\n",
+    "\n",
+    "\n",
+    "out = cudf.Series(cp.zeros(len(s), dtype=\"int32\"))\n",
     "multiply_by_5.forall(s.shape[0])(s, out)\n",
     "out"
    ]
diff --git a/docs/cudf/source/user_guide/missing-data.ipynb b/docs/cudf/source/user_guide/missing-data.ipynb
index ac5bddd34cf..f1404ce0b77 100644
--- a/docs/cudf/source/user_guide/missing-data.ipynb
+++ b/docs/cudf/source/user_guide/missing-data.ipynb
@@ -39,8 +39,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import cudf\n",
-    "import numpy as np"
+    "import numpy as np\n",
+    "\n",
+    "import cudf"
    ]
   },
   {
@@ -50,7 +51,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = cudf.DataFrame({'a': [1, 2, None, 4], 'b':[0.1, None, 2.3, 17.17]})"
+    "df = cudf.DataFrame({\"a\": [1, 2, None, 4], \"b\": [0.1, None, 2.3, 17.17]})"
    ]
   },
   {
@@ -221,7 +222,7 @@
     }
    ],
    "source": [
-    "df['a'].notna()"
+    "df[\"a\"].notna()"
    ]
   },
   {
@@ -304,7 +305,7 @@
     }
    ],
    "source": [
-    "df['b'] == np.nan"
+    "df[\"b\"] == np.nan"
    ]
   },
   {
@@ -535,7 +536,10 @@
    ],
    "source": [
     "import pandas as pd\n",
-    "datetime_series = cudf.Series([pd.Timestamp(\"20120101\"), pd.NaT, pd.Timestamp(\"20120101\")])\n",
+    "\n",
+    "datetime_series = cudf.Series(\n",
+    "    [pd.Timestamp(\"20120101\"), pd.NaT, pd.Timestamp(\"20120101\")]\n",
+    ")\n",
     "datetime_series"
    ]
   },
@@ -618,7 +622,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df1 = cudf.DataFrame({'a':[1, None, 2, 3, None], 'b':cudf.Series([np.nan, 2, 3.2, 0.1, 1], nan_as_null=False)})"
+    "df1 = cudf.DataFrame(\n",
+    "    {\n",
+    "        \"a\": [1, None, 2, 3, None],\n",
+    "        \"b\": cudf.Series([np.nan, 2, 3.2, 0.1, 1], nan_as_null=False),\n",
+    "    }\n",
+    ")"
    ]
   },
   {
@@ -628,7 +637,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df2 = cudf.DataFrame({'a':[1, 11, 2, 34, 10], 'b':cudf.Series([0.23, 22, 3.2, None, 1])})"
+    "df2 = cudf.DataFrame(\n",
+    "    {\"a\": [1, 11, 2, 34, 10], \"b\": cudf.Series([0.23, 22, 3.2, None, 1])}\n",
+    ")"
    ]
   },
   {
@@ -899,7 +910,7 @@
     }
    ],
    "source": [
-    "df1['a']"
+    "df1[\"a\"]"
    ]
   },
   {
@@ -920,7 +931,7 @@
     }
    ],
    "source": [
-    "df1['a'].sum()"
+    "df1[\"a\"].sum()"
    ]
   },
   {
@@ -949,7 +960,7 @@
     }
    ],
    "source": [
-    "df1['a'].mean()"
+    "df1[\"a\"].mean()"
    ]
   },
   {
@@ -980,7 +991,7 @@
     }
    ],
    "source": [
-    "df1['a'].sum(skipna=False)"
+    "df1[\"a\"].sum(skipna=False)"
    ]
   },
   {
@@ -1001,7 +1012,7 @@
     }
    ],
    "source": [
-    "df1['a'].mean(skipna=False)"
+    "df1[\"a\"].mean(skipna=False)"
    ]
   },
   {
@@ -1035,7 +1046,7 @@
     }
    ],
    "source": [
-    "df1['a'].cumsum()"
+    "df1[\"a\"].cumsum()"
    ]
   },
   {
@@ -1069,7 +1080,7 @@
     }
    ],
    "source": [
-    "df1['a'].cumsum(skipna=False)"
+    "df1[\"a\"].cumsum(skipna=False)"
    ]
   },
   {
@@ -1148,7 +1159,7 @@
     }
    ],
    "source": [
-    "cudf.Series([], dtype='float64').sum()"
+    "cudf.Series([], dtype=\"float64\").sum()"
    ]
   },
   {
@@ -1219,7 +1230,7 @@
     }
    ],
    "source": [
-    "cudf.Series([], dtype='float64').prod()"
+    "cudf.Series([], dtype=\"float64\").prod()"
    ]
   },
   {
@@ -1382,7 +1393,7 @@
     }
    ],
    "source": [
-    "df1.groupby('a').mean()"
+    "df1.groupby(\"a\").mean()"
    ]
   },
   {
@@ -1463,7 +1474,7 @@
     }
    ],
    "source": [
-    "df1.groupby('a', dropna=False).mean()"
+    "df1.groupby(\"a\", dropna=False).mean()"
    ]
   },
   {
@@ -1670,7 +1681,7 @@
     }
    ],
    "source": [
-    "df1['b'].fillna(10)"
+    "df1[\"b\"].fillna(10)"
    ]
   },
   {
@@ -1697,7 +1708,8 @@
    "outputs": [],
    "source": [
     "import cupy as cp\n",
-    "dff = cudf.DataFrame(cp.random.randn(10, 3), columns=list('ABC'))"
+    "\n",
+    "dff = cudf.DataFrame(cp.random.randn(10, 3), columns=list(\"ABC\"))"
    ]
   },
   {
@@ -2339,7 +2351,7 @@
     }
    ],
    "source": [
-    "df1['a'].dropna()"
+    "df1[\"a\"].dropna()"
    ]
   },
   {

From d14d980b63402a779a3f75cc64cb3a5a0be7898d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 24 Feb 2023 15:30:07 -0800
Subject: [PATCH 16/60] Add dfg as a pre-commit hook (#12819)

This change allows local and remote runs to handle calls to dfg identically, and removes the need for a separate CI check.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12819
---
 .github/workflows/pr.yaml | 2 ++
 .pre-commit-config.yaml   | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 952b58abda5..3a80139e333 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -30,6 +30,8 @@ jobs:
   checks:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04
+    with:
+      enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a030f3bd25b..1eb2c508db9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -160,6 +160,11 @@ repos:
                     .*test.*|
                     ^CHANGELOG.md$
                   )
+      - repo: https://github.com/rapidsai/dependency-file-generator
+        rev: v1.4.0
+        hooks:
+            - id: rapids-dependency-file-generator
+              args: ["--clean"]
 
 default_language_version:
       python: python3

From eb4da9345f172c3911f78c5e851757ec2ec222b9 Mon Sep 17 00:00:00 2001
From: Carl Simon Adorf <sadorf@nvidia.com>
Date: Sat, 25 Feb 2023 01:13:34 +0100
Subject: [PATCH 17/60] CI: Remove specification of manual stage for
 check_style.sh script. (#12803)

Do not explicitly specify to run the "manual" stage when running pre-commits as part of the ci/check_style.sh script.

Authors:
  - Carl Simon Adorf (https://github.com/csadorf)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12803
---
 ci/check_style.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/check_style.sh b/ci/check_style.sh
index 020143095ce..f9bfea7b47c 100755
--- a/ci/check_style.sh
+++ b/ci/check_style.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -20,4 +20,4 @@ mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
 wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
 
 # Run pre-commit checks
-pre-commit run --hook-stage manual --all-files --show-diff-on-failure
+pre-commit run --all-files --show-diff-on-failure

From 173459e99fa8fce4202e2613f1f2c89a016cd350 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 24 Feb 2023 21:12:33 -0800
Subject: [PATCH 18/60] Replace message parsing with throwing more specific
 exceptions (#12426)

This PR identifies places where cuDF Python is parsing exception messages from libcudf in order to throw an appropriate Python exception and modifies the associated libcudf error macros (`CUDF_EXPECTS` or `CUDF_FAIL`) to throw a more descriptive exception. In order to fully support this behavior with arbitrary libcudf exceptions, this PR also introduces a custom Cython exception handler that can be extended to catch and handle any new type of exception thrown by libcudf. To cases where issues with the dtype of an argument is treated as a TypeError rather than a ValueError in pandas, a new exception type `dtype_error : public logic_error` is defined and mapped to a TypeError in Python. This PR does not aim to exhaustively improve the choice of thrown exceptions throughout libcudf, only in places that are immediately relevant to removing message parsing in cudf Python.

Resolves #10632

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Ashwin Srinath (https://github.com/shwina)
  - Matthew Roeschke (https://github.com/mroeschke)
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12426
---
 cpp/include/cudf/binaryop.hpp                 |  5 +-
 cpp/include/cudf/concatenate.hpp              | 10 +--
 cpp/include/cudf/lists/combine.hpp            |  8 +-
 cpp/include/cudf/lists/contains.hpp           |  9 +--
 cpp/include/cudf/lists/gather.hpp             |  4 +-
 cpp/include/cudf/strings/json.hpp             |  4 +-
 cpp/include/cudf/utilities/error.hpp          | 25 +++++-
 cpp/src/binaryop/binaryop.cpp                 |  2 +-
 cpp/src/binaryop/compiled/equality_ops.cu     |  5 +-
 .../binaryop/compiled/struct_binary_ops.cuh   |  3 +-
 cpp/src/copying/concatenate.cu                | 11 ++-
 .../combine/concatenate_list_elements.cu      |  8 +-
 cpp/src/lists/contains.cu                     |  3 +-
 cpp/src/lists/copying/segmented_gather.cu     |  4 +-
 cpp/src/strings/json/json_path.cu             |  7 +-
 .../binop-compiled-fixed_point-test.cpp       |  2 +-
 cpp/tests/copying/concatenate_tests.cu        | 25 +++---
 .../copying/segmented_gather_list_tests.cpp   |  6 +-
 .../concatenate_list_elements_tests.cpp       |  8 +-
 cpp/tests/lists/contains_tests.cpp            | 33 +++++---
 cpp/tests/strings/json_tests.cpp              | 10 ++-
 .../developer_guide/contributing_guide.md     | 16 ++--
 java/src/main/native/include/jni_utils.hpp    |  6 +-
 python/cudf/cudf/_lib/CMakeLists.txt          |  5 ++
 python/cudf/cudf/_lib/cpp/copying.pxd         |  5 +-
 python/cudf/cudf/_lib/cpp/lists/contains.pxd  |  9 ++-
 python/cudf/cudf/_lib/exception_handler.hpp   | 80 +++++++++++++++++++
 python/cudf/cudf/_lib/exception_handler.pxd   |  5 ++
 python/cudf/cudf/core/column/column.py        | 17 +---
 python/cudf/cudf/core/column/decimal.py       | 48 +++++------
 python/cudf/cudf/core/column/lists.py         | 67 ++++------------
 python/cudf/cudf/core/column/string.py        | 33 +++-----
 python/cudf/cudf/tests/test_column.py         |  5 +-
 33 files changed, 284 insertions(+), 204 deletions(-)
 create mode 100644 python/cudf/cudf/_lib/exception_handler.hpp
 create mode 100644 python/cudf/cudf/_lib/exception_handler.pxd

diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
index 6371cb6c82b..77d6a4d1e89 100644
--- a/cpp/include/cudf/binaryop.hpp
+++ b/cpp/include/cudf/binaryop.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -108,6 +108,7 @@ enum class binary_operator : int32_t {
  * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
  * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical
  * operations.
+ * @throw cudf::data_type_error if the operation is not supported for the types of @p lhs and @p rhs
  */
 std::unique_ptr<column> binary_operation(
   scalar const& lhs,
@@ -136,6 +137,7 @@ std::unique_ptr<column> binary_operation(
  * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
  * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical
  * operations.
+ * @throw cudf::data_type_error if the operation is not supported for the types of @p lhs and @p rhs
  */
 std::unique_ptr<column> binary_operation(
   column_view const& lhs,
@@ -163,6 +165,7 @@ std::unique_ptr<column> binary_operation(
  * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical
  * operations.
  * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
+ * @throw cudf::data_type_error if the operation is not supported for the types of @p lhs and @p rhs
  */
 std::unique_ptr<column> binary_operation(
   column_view const& lhs,
diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp
index 1f8ce65ad93..b20c97b3c31 100644
--- a/cpp/include/cudf/concatenate.hpp
+++ b/cpp/include/cudf/concatenate.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -50,8 +50,8 @@ rmm::device_buffer concatenate_masks(
 /**
  * @brief Concatenates multiple columns into a single column.
  *
- * @throws cudf::logic_error
- * If types of the input columns mismatch
+ * @throws cudf::logic_error If types of the input columns mismatch
+ * @throws std::overflow_error If the the total number of output rows exceeds cudf::size_type
  *
  * @param columns_to_concat host_span of column views to be concatenated into a single column
  * @param mr Device memory resource used to allocate the returned column's device memory
@@ -80,8 +80,8 @@ std::unique_ptr<column> concatenate(
  * column_view tc1 = (t->view()).column(1); //Contains {0,1,2,3,4,5,6,7}
  * ```
  *
- * @throws cudf::logic_error
- * If number of columns mismatch
+ * @throws cudf::logic_error If number of columns mismatch
+ * @throws std::overflow_error If the the total number of output rows exceeds cudf::size_type
  *
  * @param tables_to_concat host_span of table views to be concatenated into a single table
  * @param mr Device memory resource used to allocate the returned table's device memory
diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp
index 4f211e87cc7..531396e940e 100644
--- a/cpp/include/cudf/lists/combine.hpp
+++ b/cpp/include/cudf/lists/combine.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -80,10 +80,10 @@ std::unique_ptr<column> concatenate_rows(
  * r is [ {1, 2, 3, 4, 5}, {6, 7, 8, 9} ]
  * @endcode
  *
- * @throws cudf::logic_error if the input column is not at least two-level depth lists column (i.e.,
- *         each row must be a list of list).
+ * @throws std::invalid_argument if the input column is not at least two-level depth lists column
+ * (i.e., each row must be a list of list).
  * @throws cudf::logic_error if the input lists column contains nested typed entries that are not
- *         lists.
+ * lists.
  *
  * @param input The lists column containing lists of list elements to concatenate.
  * @param null_policy The parameter to specify whether a null list element will be ignored from
diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp
index d2b4d59dfba..a9f06bf399c 100644
--- a/cpp/include/cudf/lists/contains.hpp
+++ b/cpp/include/cudf/lists/contains.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -126,9 +126,7 @@ enum class duplicate_find_option : int32_t {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return std::unique_ptr<column> INT32 column of `n` rows with the location of the `search_key`
  *
- * @throw cudf::logic_error If `search_key` type does not match the element type in `lists`
- * @throw cudf::logic_error If `search_key` is of a nested type, or `lists` contains nested
- * elements (LIST, STRUCT)
+ * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists`
  */
 std::unique_ptr<column> index_of(
   cudf::lists_column_view const& lists,
@@ -163,8 +161,7 @@ std::unique_ptr<column> index_of(
  * @return std::unique_ptr<column> INT32 column of `n` rows with the location of the `search_key`
  *
  * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows
- * @throw cudf::logic_error If `search_keys` type does not match the element type in `lists`
- * @throw cudf::logic_error If `lists` or `search_keys` contains nested elements (LIST, STRUCT)
+ * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists`
  */
 std::unique_ptr<column> index_of(
   cudf::lists_column_view const& lists,
diff --git a/cpp/include/cudf/lists/gather.hpp b/cpp/include/cudf/lists/gather.hpp
index f91ce29a7cb..38bed9ede43 100644
--- a/cpp/include/cudf/lists/gather.hpp
+++ b/cpp/include/cudf/lists/gather.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ namespace lists {
  * @endcode
  *
  * @throws cudf::logic_error if `gather_map_list` size is not same as `source_column` size.
- * @throws cudf::logic_error if gather_map contains null values.
+ * @throws std::invalid_argument if gather_map contains null values.
  * @throws cudf::logic_error if gather_map is not list column of an index type.
  *
  * If indices in `gather_map_list` are outside the range `[-n, n)`, where `n` is the number of
diff --git a/cpp/include/cudf/strings/json.hpp b/cpp/include/cudf/strings/json.hpp
index 11e8daa9855..8fabee6b9a5 100644
--- a/cpp/include/cudf/strings/json.hpp
+++ b/cpp/include/cudf/strings/json.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -160,6 +160,8 @@ class get_json_object_options {
  * @param options Options for controlling the behavior of the function
  * @param mr Resource for allocating device memory.
  * @return New strings column containing the retrieved json object strings
+ *
+ * @throw std::invalid_argument if provided an invalid operator or an empty name
  */
 std::unique_ptr<cudf::column> get_json_object(
   cudf::strings_column_view const& col,
diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index 38ca0f2651e..f70ef4e5f07 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -84,6 +84,29 @@ struct cuda_error : public std::runtime_error {
 struct fatal_cuda_error : public cuda_error {
   using cuda_error::cuda_error;  // Inherit constructors
 };
+
+/**
+ * @brief Exception thrown when an operation is attempted on an unsupported dtype.
+ *
+ * This exception should be thrown when an operation is attempted on an
+ * unsupported data_type. This exception should not be thrown directly and is
+ * instead thrown by the CUDF_EXPECTS or CUDF_FAIL macros.
+ */
+struct data_type_error : public std::invalid_argument {
+  /**
+   * @brief Constructs a data_type_error with the error message.
+   *
+   * @param message Message to be associated with the exception
+   */
+  data_type_error(char const* const message) : std::invalid_argument(message) {}
+
+  /**
+   * @brief Construct a new data_type_error object with error message
+   *
+   * @param message Message to be associated with the exception
+   */
+  data_type_error(std::string const& message) : std::invalid_argument(message) {}
+};
 /** @} */
 
 }  // namespace cudf
diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
index b23c1fc9fe1..f81f0dcc311 100644
--- a/cpp/src/binaryop/binaryop.cpp
+++ b/cpp/src/binaryop/binaryop.cpp
@@ -203,7 +203,7 @@ std::unique_ptr<column> binary_operation(LhsType const& lhs,
     return cudf::binops::compiled::string_null_min_max(lhs, rhs, op, output_type, stream, mr);
 
   if (not cudf::binops::compiled::is_supported_operation(output_type, lhs.type(), rhs.type(), op))
-    CUDF_FAIL("Unsupported operator for these types");
+    CUDF_FAIL("Unsupported operator for these types", cudf::data_type_error);
 
   if (cudf::is_fixed_point(lhs.type()) or cudf::is_fixed_point(rhs.type())) {
     cudf::binops::compiled::fixed_point_binary_operation_validation(
diff --git a/cpp/src/binaryop/compiled/equality_ops.cu b/cpp/src/binaryop/compiled/equality_ops.cu
index 61f02252a26..041fca76494 100644
--- a/cpp/src/binaryop/compiled/equality_ops.cu
+++ b/cpp/src/binaryop/compiled/equality_ops.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,7 +26,8 @@ void dispatch_equality_op(mutable_column_view& out,
                           rmm::cuda_stream_view stream)
 {
   CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL,
-               "Unsupported operator for these types");
+               "Unsupported operator for these types",
+               cudf::data_type_error);
   auto common_dtype = get_common_type(out.type(), lhs.type(), rhs.type());
   auto outd         = mutable_column_device_view::create(out, stream);
   auto lhsd         = column_device_view::create(lhs, stream);
diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
index d167f0fe3c5..8418493318f 100644
--- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
@@ -149,7 +149,8 @@ void apply_struct_equality_op(mutable_column_view& out,
 {
   CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL ||
                  op == binary_operator::NULL_EQUALS,
-               "Unsupported operator for these types");
+               "Unsupported operator for these types",
+               cudf::data_type_error);
 
   auto tlhs = table_view{{lhs}};
   auto trhs = table_view{{rhs}};
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index 577d6427b19..5d36d70696c 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -228,7 +228,8 @@ std::unique_ptr<column> fused_concatenate(host_span<column_view const> views,
   auto const output_size  = std::get<3>(device_views);
 
   CUDF_EXPECTS(output_size <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
-               "Total number of concatenated rows exceeds size_type range");
+               "Total number of concatenated rows exceeds size_type range",
+               std::overflow_error);
 
   // Allocate output
   auto const policy = has_nulls ? mask_policy::ALWAYS : mask_policy::NEVER;
@@ -398,7 +399,8 @@ void traverse_children::operator()<cudf::string_view>(host_span<column_view cons
     });
   // note:  output text must include "exceeds size_type range" for python error handling
   CUDF_EXPECTS(total_char_count <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
-               "Total number of concatenated chars exceeds size_type range");
+               "Total number of concatenated chars exceeds size_type range",
+               std::overflow_error);
 }
 
 template <>
@@ -469,7 +471,8 @@ void bounds_and_type_check(host_span<column_view const> cols, rmm::cuda_stream_v
     });
   // note:  output text must include "exceeds size_type range" for python error handling
   CUDF_EXPECTS(total_row_count <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
-               "Total number of concatenated rows exceeds size_type range");
+               "Total number of concatenated rows exceeds size_type range",
+               std::overflow_error);
 
   // traverse children
   cudf::type_dispatcher(cols.front().type(), traverse_children{}, cols, stream);
diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu
index 496d9ee670a..257b0aed82f 100644
--- a/cpp/src/lists/combine/concatenate_list_elements.cu
+++ b/cpp/src/lists/combine/concatenate_list_elements.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -257,11 +257,13 @@ std::unique_ptr<column> concatenate_list_elements(column_view const& input,
                                                   rmm::mr::device_memory_resource* mr)
 {
   auto type = input.type();  // Column that is lists of lists.
-  CUDF_EXPECTS(type.id() == type_id::LIST, "Input column must be a lists column.");
+  CUDF_EXPECTS(
+    type.id() == type_id::LIST, "Input column must be a lists column.", std::invalid_argument);
 
   auto col = lists_column_view(input).child();  // Rows, which are lists.
   type     = col.type();
-  CUDF_EXPECTS(type.id() == type_id::LIST, "Rows of the input column must be lists.");
+  CUDF_EXPECTS(
+    type.id() == type_id::LIST, "Rows of the input column must be lists.", std::invalid_argument);
 
   col  = lists_column_view(col).child();  // The last level entries what we need to check.
   type = col.type();
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index 05fe82d1713..a3293e36825 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -309,7 +309,8 @@ struct dispatch_index_of {
     auto const child = lists.child();
 
     CUDF_EXPECTS(child.type() == search_keys.type(),
-                 "Type/Scale of search key does not match list column element type.");
+                 "Type/Scale of search key does not match list column element type.",
+                 cudf::data_type_error);
     CUDF_EXPECTS(search_keys.type().id() != type_id::EMPTY, "Type cannot be empty.");
 
     auto constexpr search_key_is_scalar = std::is_same_v<SearchKeyType, cudf::scalar>;
diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu
index 2c12e09bcd9..79d33e7c17d 100644
--- a/cpp/src/lists/copying/segmented_gather.cu
+++ b/cpp/src/lists/copying/segmented_gather.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ std::unique_ptr<column> segmented_gather(lists_column_view const& value_column,
 {
   CUDF_EXPECTS(is_index_type(gather_map.child().type()),
                "Gather map should be list column of index type");
-  CUDF_EXPECTS(!gather_map.has_nulls(), "Gather map contains nulls");
+  CUDF_EXPECTS(!gather_map.has_nulls(), "Gather map contains nulls", std::invalid_argument);
   CUDF_EXPECTS(value_column.size() == gather_map.size(),
                "Gather map and list column should be same size");
 
diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu
index afe16518036..c6ea47ec0f3 100644
--- a/cpp/src/strings/json/json_path.cu
+++ b/cpp/src/strings/json/json_path.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -588,7 +588,7 @@ class path_state : private parser {
         return path_operator{path_operator_type::CHILD_WILDCARD};
       } break;
 
-      default: CUDF_FAIL("Unrecognized JSONPath operator"); break;
+      default: CUDF_FAIL("Unrecognized JSONPath operator", std::invalid_argument); break;
     }
     return {path_operator_type::ERROR};
   }
@@ -624,7 +624,8 @@ class path_state : private parser {
     }
 
     // an empty name is not valid
-    CUDF_EXPECTS(name.size_bytes() > 0, "Invalid empty name in JSONPath query string");
+    CUDF_EXPECTS(
+      name.size_bytes() > 0, "Invalid empty name in JSONPath query string", std::invalid_argument);
 
     return true;
   }
diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
index 8170fe4f490..bdd0003b86b 100644
--- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
@@ -678,7 +678,7 @@ TYPED_TEST(FixedPointCompiledTest, FixedPointBinaryOpThrows)
   auto const col           = fp_wrapper<RepType>{{100, 300, 500, 700}, scale_type{-2}};
   auto const non_bool_type = cudf::data_type{cudf::type_to_id<decimalXX>(), -2};
   EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type),
-               cudf::logic_error);
+               cudf::data_type_error);
 }
 
 TYPED_TEST(FixedPointCompiledTest, FixedPointBinaryOpModSimple)
diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu
index 79ec2293455..ca343b963d7 100644
--- a/cpp/tests/copying/concatenate_tests.cu
+++ b/cpp/tests/copying/concatenate_tests.cu
@@ -36,6 +36,7 @@
 #include <thrust/scan.h>
 
 #include <numeric>
+#include <stdexcept>
 #include <string>
 
 template <typename T>
@@ -369,7 +370,7 @@ TEST_F(OverflowTest, OverflowTest)
 
     cudf::table_view tbl({*many_chars});
     EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({tbl, tbl, tbl, tbl, tbl, tbl})),
-                 cudf::logic_error);
+                 std::overflow_error);
   }
 
   // string column, overflow on chars
@@ -384,7 +385,7 @@ TEST_F(OverflowTest, OverflowTest)
 
     cudf::table_view tbl({*col});
     EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({tbl, tbl, tbl, tbl, tbl, tbl})),
-                 cudf::logic_error);
+                 std::overflow_error);
   }
 
   // string column, overflow on offsets (rows)
@@ -400,7 +401,7 @@ TEST_F(OverflowTest, OverflowTest)
 
     cudf::table_view tbl({*col});
     EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({tbl, tbl, tbl, tbl, tbl, tbl})),
-                 cudf::logic_error);
+                 std::overflow_error);
   }
 
   // list<struct>, structs too long
@@ -425,7 +426,7 @@ TEST_F(OverflowTest, OverflowTest)
     cudf::table_view tbl({*col});
     auto tables =
       std::vector<cudf::table_view>({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl});
-    EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(tables), std::overflow_error);
   }
 
   // struct<int, list>, list child too long
@@ -450,7 +451,7 @@ TEST_F(OverflowTest, OverflowTest)
     cudf::table_view tbl({*col});
     auto tables =
       std::vector<cudf::table_view>({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl});
-    EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(tables), std::overflow_error);
   }
 }
 
@@ -470,7 +471,8 @@ TEST_F(OverflowTest, Presliced)
 
     // 513 * 1024 * 1024, should fail
     cudf::table_view b({sliced[1]});
-    EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({b, b, b, b})), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({b, b, b, b})),
+                 std::overflow_error);
   }
 
   // struct<int8> column
@@ -490,7 +492,8 @@ TEST_F(OverflowTest, Presliced)
 
     // 513 * 1024 * 1024, should fail
     cudf::table_view b({sliced[1]});
-    EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({b, b, b, b})), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({b, b, b, b})),
+                 std::overflow_error);
   }
 
   // strings, overflow on chars
@@ -516,7 +519,8 @@ TEST_F(OverflowTest, Presliced)
 
     // (num_rows / 2) + 1 should fail
     cudf::table_view b({sliced[1]});
-    EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({b, b, b, b})), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({b, b, b, b})),
+                 std::overflow_error);
   }
 
   // strings, overflow on offsets
@@ -589,7 +593,7 @@ TEST_F(OverflowTest, Presliced)
     auto sliced = cudf::split(*col, {2});
     cudf::table_view tbl({sliced[1]});
     auto tables = std::vector<cudf::table_view>({tbl, tbl, tbl, tbl});
-    EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(tables), std::overflow_error);
   }
 
   // list<struct>, overflow on offsets
@@ -674,7 +678,8 @@ TEST_F(OverflowTest, Presliced)
     cudf::concatenate(std::vector<cudf::table_view>({a, a, a, a}));
 
     cudf::table_view b({sliced[1]});
-    EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({b, b, b, b})), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(std::vector<cudf::table_view>({b, b, b, b})),
+                 std::overflow_error);
   }
 }
 
diff --git a/cpp/tests/copying/segmented_gather_list_tests.cpp b/cpp/tests/copying/segmented_gather_list_tests.cpp
index deeebc641c2..fc21af2087b 100644
--- a/cpp/tests/copying/segmented_gather_list_tests.cpp
+++ b/cpp/tests/copying/segmented_gather_list_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,6 +26,8 @@
 #include <cudf/lists/gather.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 
+#include <stdexcept>
+
 template <typename T>
 class SegmentedGatherTest : public cudf::test::BaseFixture {
 };
@@ -611,7 +613,7 @@ TEST_F(SegmentedGatherTestFloat, Fails)
   // Nulls are not supported in the gather map.
   EXPECT_THROW(cudf::lists::segmented_gather(cudf::lists_column_view{list},
                                              cudf::lists_column_view{nulls_map}),
-               cudf::logic_error);
+               std::invalid_argument);
 
   // Gather map and list column sizes must be the same.
   EXPECT_THROW(cudf::lists::segmented_gather(cudf::lists_column_view{list},
diff --git a/cpp/tests/lists/combine/concatenate_list_elements_tests.cpp b/cpp/tests/lists/combine/concatenate_list_elements_tests.cpp
index ca25560141c..77e8f904d01 100644
--- a/cpp/tests/lists/combine/concatenate_list_elements_tests.cpp
+++ b/cpp/tests/lists/combine/concatenate_list_elements_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,8 @@
 
 #include <cudf/lists/combine.hpp>
 
+#include <stdexcept>
+
 using namespace cudf::test::iterators;
 
 namespace {
@@ -47,13 +49,13 @@ TEST_F(ConcatenateListElementsTest, InvalidInput)
   // Input lists is not a 2-level depth lists column.
   {
     auto const col = IntCol{};
-    EXPECT_THROW(cudf::lists::concatenate_list_elements(col), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::concatenate_list_elements(col), std::invalid_argument);
   }
 
   // Input lists is not at least 2-level depth lists column.
   {
     auto const col = IntListsCol{1, 2, 3};
-    EXPECT_THROW(cudf::lists::concatenate_list_elements(col), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::concatenate_list_elements(col), std::invalid_argument);
   }
 }
 
diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp
index 2139103500a..f592819dacb 100644
--- a/cpp/tests/lists/contains_tests.cpp
+++ b/cpp/tests/lists/contains_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -503,10 +503,11 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions)
       {{1, 2, 3},
        {4, 5, 6}}}.release();
     auto skey = create_scalar_search_key<int32_t>(10);
-    EXPECT_THROW(cudf::lists::contains(list_of_lists->view(), *skey), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::contains(list_of_lists->view(), *skey), cudf::data_type_error);
     EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), *skey, FIND_FIRST),
-                 cudf::logic_error);
-    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), *skey, FIND_LAST), cudf::logic_error);
+                 cudf::data_type_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), *skey, FIND_LAST),
+                 cudf::data_type_error);
   }
   {
     // Search key must match list elements in type.
@@ -517,9 +518,11 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions)
       }
         .release();
     auto skey = create_scalar_search_key<std::string>("Hello, World!");
-    EXPECT_THROW(cudf::lists::contains(list_of_ints->view(), *skey), cudf::logic_error);
-    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), *skey, FIND_FIRST), cudf::logic_error);
-    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), *skey, FIND_LAST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::contains(list_of_ints->view(), *skey), cudf::data_type_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), *skey, FIND_FIRST),
+                 cudf::data_type_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), *skey, FIND_LAST),
+                 cudf::data_type_error);
   }
 }
 
@@ -813,9 +816,11 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions)
       {{1, 2, 3},
        {4, 5, 6}}}.release();
     auto skey = cudf::test::fixed_width_column_wrapper<int32_t>{0, 1, 2};
-    EXPECT_THROW(cudf::lists::contains(list_of_lists->view(), skey), cudf::logic_error);
-    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), skey, FIND_FIRST), cudf::logic_error);
-    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), skey, FIND_LAST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::contains(list_of_lists->view(), skey), cudf::data_type_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), skey, FIND_FIRST),
+                 cudf::data_type_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), skey, FIND_LAST),
+                 cudf::data_type_error);
   }
   {
     // Search key must match list elements in type.
@@ -826,9 +831,11 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions)
       }
         .release();
     auto skey = cudf::test::strings_column_wrapper{"Hello", "World"};
-    EXPECT_THROW(cudf::lists::contains(list_of_ints->view(), skey), cudf::logic_error);
-    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), skey, FIND_FIRST), cudf::logic_error);
-    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), skey, FIND_LAST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::contains(list_of_ints->view(), skey), cudf::data_type_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), skey, FIND_FIRST),
+                 cudf::data_type_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), skey, FIND_LAST),
+                 cudf::data_type_error);
   }
   {
     // Search key column size must match lists column size.
diff --git a/cpp/tests/strings/json_tests.cpp b/cpp/tests/strings/json_tests.cpp
index 1924d809743..4a485de2f2a 100644
--- a/cpp/tests/strings/json_tests.cpp
+++ b/cpp/tests/strings/json_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,6 +22,8 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
+#include <stdexcept>
+
 // reference:  https://jsonpath.herokuapp.com/
 
 // clang-format off
@@ -566,7 +568,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
     };
-    EXPECT_THROW(query(), cudf::logic_error);
+    EXPECT_THROW(query(), std::invalid_argument);
   }
 
   {
@@ -575,7 +577,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
     };
-    EXPECT_THROW(query(), cudf::logic_error);
+    EXPECT_THROW(query(), std::invalid_argument);
   }
 
   {
@@ -584,7 +586,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
     };
-    EXPECT_THROW(query(), cudf::logic_error);
+    EXPECT_THROW(query(), std::invalid_argument);
   }
 }
 
diff --git a/docs/cudf/source/developer_guide/contributing_guide.md b/docs/cudf/source/developer_guide/contributing_guide.md
index bb3479cf4c1..b5ea9519842 100644
--- a/docs/cudf/source/developer_guide/contributing_guide.md
+++ b/docs/cudf/source/developer_guide/contributing_guide.md
@@ -123,19 +123,13 @@ There is no need to mention when the argument will be supported in the future.
 
 ### Handling libcudf Exceptions
 
-Currently libcudf raises `cudf::logic_error` and `cudf::cuda_error`.
-These error types are mapped to `RuntimeError` in python.
-Several APIs use the exception payload `what()` message to determine the exception type raised by libcudf.
-
-Determining error type based on exception payload is brittle since libcudf does not maintain API stability on exception messages.
-This is a compromise due to libcudf only raising a limited number of error types.
-Only adopt this strategy when necessary.
-
-The projected roadmap is to diversify the exception types raised by libcudf.
 Standard C++ natively supports various [exception types](https://en.cppreference.com/w/cpp/error/exception),
 which Cython maps to [these Python exception types](https://docs.cython.org/en/latest/src/userguide/wrapping_CPlusPlus.html#exceptions).
-In the future, libcudf may employ custom C++ exception types.
-If that occurs, this section will be updated to reflect how these may be mapped to desired Python exception types.
+In addition to built-in exceptions, libcudf also raises a few additional types of exceptions.
+cuDF extends Cython's default mapping to account for these exception types.
+When a new libcudf exception type is added, a suitable except clause should be added to cuDF's
+[exception handler](https://github.com/rapidsai/cudf/blob/main/python/cudf/cudf/_lib/cpp/exception_handler.hpp).
+If no built-in Python exception seems like a good match, a new Python exception should be created.
 
 ### Raising warnings
 
diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp
index 78239b86ae2..ee2325cc76f 100644
--- a/java/src/main/native/include/jni_utils.hpp
+++ b/java/src/main/native/include/jni_utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,6 +32,7 @@ constexpr jint MINIMUM_JNI_VERSION = JNI_VERSION_1_6;
 constexpr char const *CUDA_ERROR_CLASS = "ai/rapids/cudf/CudaException";
 constexpr char const *CUDA_FATAL_ERROR_CLASS = "ai/rapids/cudf/CudaFatalException";
 constexpr char const *CUDF_ERROR_CLASS = "ai/rapids/cudf/CudfException";
+constexpr char const *CUDF_DTYPE_ERROR_CLASS = "ai/rapids/cudf/CudfException";
 constexpr char const *INDEX_OOB_CLASS = "java/lang/ArrayIndexOutOfBoundsException";
 constexpr char const *ILLEGAL_ARG_CLASS = "java/lang/IllegalArgumentException";
 constexpr char const *NPE_CLASS = "java/lang/NullPointerException";
@@ -861,6 +862,9 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
   catch (const cudf::cuda_error &e) {                                                              \
     JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_ERROR_CLASS, e, ret_val);                            \
   }                                                                                                \
+  catch (const cudf::data_type_error &e) {                                                         \
+    JNI_CHECK_THROW_NEW(env, cudf::jni::CUDF_DTYPE_ERROR_CLASS, e.what(), ret_val);                \
+  }                                                                                                \
   catch (const std::exception &e) {                                                                \
     /* Double check whether the thrown exception is unrecoverable CUDA error or not. */            \
     /* Like cudf::detail::throw_cuda_error, it is nearly certain that a fatal error  */            \
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index 4b785563484..f7d4f12ad81 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -62,6 +62,11 @@ rapids_cython_create_modules(
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf
 )
 
+# All modules need to include the header containing the exception handler.
+foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_include_directories(${target} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
+endforeach()
+
 target_link_libraries(strings_udf cudf_strings_udf)
 
 # TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was
diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd
index bc89d364004..09e8538ebb7 100644
--- a/python/cudf/cudf/_lib/cpp/copying.pxd
+++ b/python/cudf/cudf/_lib/cpp/copying.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t, int64_t, uint8_t
 from libcpp cimport bool
@@ -14,6 +14,7 @@ from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
+from cudf._lib.exception_handler cimport cudf_exception_handler
 
 ctypedef const scalar constscalar
 
@@ -32,7 +33,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const table_view& source_table,
         const column_view& gather_map,
         out_of_bounds_policy policy
-    ) except +
+    ) except +cudf_exception_handler
 
     cdef unique_ptr[column] shift(
         const column_view& input,
diff --git a/python/cudf/cudf/_lib/cpp/lists/contains.pxd b/python/cudf/cudf/_lib/cpp/lists/contains.pxd
index e3cb01721a0..e86c73deed2 100644
--- a/python/cudf/cudf/_lib/cpp/lists/contains.pxd
+++ b/python/cudf/cudf/_lib/cpp/lists/contains.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 
@@ -6,20 +6,21 @@ from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
 from cudf._lib.cpp.scalar.scalar cimport scalar
+from cudf._lib.exception_handler cimport cudf_exception_handler
 
 
 cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] contains(
         lists_column_view lists,
         scalar search_key,
-    ) except +
+    ) except +cudf_exception_handler
 
     cdef unique_ptr[column] index_of(
         lists_column_view lists,
         scalar search_key,
-    ) except +
+    ) except +cudf_exception_handler
 
     cdef unique_ptr[column] index_of(
         lists_column_view lists,
         column_view search_keys,
-    ) except +
+    ) except +cudf_exception_handler
diff --git a/python/cudf/cudf/_lib/exception_handler.hpp b/python/cudf/cudf/_lib/exception_handler.hpp
new file mode 100644
index 00000000000..8daffddd7bd
--- /dev/null
+++ b/python/cudf/cudf/_lib/exception_handler.hpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <Python.h>
+#include <cudf/utilities/error.hpp>
+#include <ios>
+#include <stdexcept>
+
+namespace cudf_python {
+namespace exceptions {
+
+/**
+ * @brief Exception handler to map C++ exceptions to Python ones in Cython
+ *
+ * This exception handler extends the base exception handler provided by
+ * Cython (https://github.com/cython/cython/blob/master/Cython/Utility/CppSupport.cpp#L9).
+ * In addition to the exceptions that Cython itself supports, this file adds support
+ * for additional exceptions thrown by libcudf that need to be mapped to specific Python
+ * exceptions.
+ *
+ * Since this function interoperates with Python's exception state, it does not throw
+ * any C++ exceptions.
+ */
+void cudf_exception_handler()
+{
+  // Catch a handful of different errors here and turn them into the
+  // equivalent Python errors.
+  try {
+    if (PyErr_Occurred())
+      ;  // let the latest Python exn pass through and ignore the current one else
+    throw;
+  } catch (const std::bad_alloc& exn) {
+    PyErr_SetString(PyExc_MemoryError, exn.what());
+  } catch (const std::bad_cast& exn) {
+    PyErr_SetString(PyExc_TypeError, exn.what());
+  } catch (const std::domain_error& exn) {
+    PyErr_SetString(PyExc_ValueError, exn.what());
+  } catch (const cudf::data_type_error& exn) {
+    // Have to catch data_type_error before invalid_argument because it is a subclass
+    PyErr_SetString(PyExc_TypeError, exn.what());
+  } catch (const std::invalid_argument& exn) {
+    PyErr_SetString(PyExc_ValueError, exn.what());
+  } catch (const std::ios_base::failure& exn) {
+    // Unfortunately, in standard C++ we have no way of distinguishing EOF
+    // from other errors here; be careful with the exception mask
+    PyErr_SetString(PyExc_IOError, exn.what());
+  } catch (const std::out_of_range& exn) {
+    // Change out_of_range to IndexError
+    PyErr_SetString(PyExc_IndexError, exn.what());
+  } catch (const std::overflow_error& exn) {
+    PyErr_SetString(PyExc_OverflowError, exn.what());
+  } catch (const std::range_error& exn) {
+    PyErr_SetString(PyExc_ArithmeticError, exn.what());
+  } catch (const std::underflow_error& exn) {
+    PyErr_SetString(PyExc_ArithmeticError, exn.what());
+    // The below is the default catch-all case.
+  } catch (const std::exception& exn) {
+    PyErr_SetString(PyExc_RuntimeError, exn.what());
+  } catch (...) {
+    PyErr_SetString(PyExc_RuntimeError, "Unknown exception");
+  }
+}
+
+}  // namespace exceptions
+}  // namespace cudf_python
diff --git a/python/cudf/cudf/_lib/exception_handler.pxd b/python/cudf/cudf/_lib/exception_handler.pxd
new file mode 100644
index 00000000000..14ac3bb1d40
--- /dev/null
+++ b/python/cudf/cudf/_lib/exception_handler.pxd
@@ -0,0 +1,5 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+
+cdef extern from "exception_handler.hpp" namespace "cudf_python::exceptions":
+    cdef void cudf_exception_handler()
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index fb1bcf6d673..b5f36aa3594 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2587,19 +2587,10 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
             f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
         )
     elif newsize == 0:
-        col = column_empty(0, head.dtype, masked=True)
-    else:
-        # Filter out inputs that have 0 length, then concatenate.
-        objs = [o for o in objs if len(o)]
-        try:
-            col = libcudf.concat.concat_columns(objs)
-        except RuntimeError as e:
-            if "exceeds size_type range" in str(e):
-                raise OverflowError(
-                    "total size of output is too large for a cudf column"
-                ) from e
-            raise
-    return col
+        return column_empty(0, head.dtype, masked=True)
+
+    # Filter out inputs that have 0 length, then concatenate.
+    return libcudf.concat.concat_columns([o for o in objs if len(o)])
 
 
 def _proxy_cai_obj(cai, owner):
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 157bc1f4291..96b8002e2a1 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 
 import warnings
 from decimal import Decimal
@@ -79,32 +79,26 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
 
         # Binary Arithmetics between decimal columns. `Scale` and `precision`
         # are computed outside of libcudf
-        unsupported_msg = (
-            f"{op} not supported for the following dtypes: "
-            f"{self.dtype}, {other.dtype}"
-        )
-        try:
-            if op in {"__add__", "__sub__", "__mul__", "__div__"}:
-                output_type = _get_decimal_type(lhs.dtype, rhs.dtype, op)
-                result = libcudf.binaryop.binaryop(lhs, rhs, op, output_type)
-                # TODO:  Why is this necessary? Why isn't the result's
-                # precision already set correctly based on output_type?
-                result.dtype.precision = output_type.precision
-            elif op in {
-                "__eq__",
-                "__ne__",
-                "__lt__",
-                "__gt__",
-                "__le__",
-                "__ge__",
-            }:
-                result = libcudf.binaryop.binaryop(lhs, rhs, op, bool)
-            else:
-                raise TypeError(unsupported_msg)
-        except RuntimeError as e:
-            if "Unsupported operator for these types" in str(e):
-                raise TypeError(unsupported_msg) from e
-            raise
+        if op in {"__add__", "__sub__", "__mul__", "__div__"}:
+            output_type = _get_decimal_type(lhs.dtype, rhs.dtype, op)
+            result = libcudf.binaryop.binaryop(lhs, rhs, op, output_type)
+            # TODO:  Why is this necessary? Why isn't the result's
+            # precision already set correctly based on output_type?
+            result.dtype.precision = output_type.precision
+        elif op in {
+            "__eq__",
+            "__ne__",
+            "__lt__",
+            "__gt__",
+            "__le__",
+            "__ge__",
+        }:
+            result = libcudf.binaryop.binaryop(lhs, rhs, op, bool)
+        else:
+            raise TypeError(
+                f"{op} not supported for the following dtypes: "
+                f"{self.dtype}, {other.dtype}"
+            )
 
         return result
 
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 9b64f26f0fb..4eea64c00d3 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -406,19 +406,9 @@ def contains(self, search_key: ScalarLike) -> ParentType:
         Series([False, True, True])
         dtype: bool
         """
-        search_key = cudf.Scalar(search_key)
-        try:
-            res = self._return_or_inplace(
-                contains_scalar(self._column, search_key)
-            )
-        except RuntimeError as e:
-            if (
-                "Type/Scale of search key does not "
-                "match list column element type." in str(e)
-            ):
-                raise TypeError(str(e)) from e
-            raise
-        return res
+        return self._return_or_inplace(
+            contains_scalar(self._column, cudf.Scalar(search_key))
+        )
 
     def index(self, search_key: Union[ScalarLike, ColumnLike]) -> ParentType:
         """
@@ -465,23 +455,14 @@ def index(self, search_key: Union[ScalarLike, ColumnLike]) -> ParentType:
         dtype: int32
         """
 
-        try:
-            if is_scalar(search_key):
-                return self._return_or_inplace(
-                    index_of_scalar(self._column, cudf.Scalar(search_key))
-                )
-            else:
-                return self._return_or_inplace(
-                    index_of_column(self._column, as_column(search_key))
-                )
-
-        except RuntimeError as e:
-            if (
-                "Type/Scale of search key does not "
-                "match list column element type." in str(e)
-            ):
-                raise TypeError(str(e)) from e
-            raise
+        if is_scalar(search_key):
+            return self._return_or_inplace(
+                index_of_scalar(self._column, cudf.Scalar(search_key))
+            )
+        else:
+            return self._return_or_inplace(
+                index_of_column(self._column, as_column(search_key))
+            )
 
     @property
     def leaves(self) -> ParentType:
@@ -577,16 +558,9 @@ def take(self, lists_indices: ColumnLike) -> ParentType:
                 "lists_indices should be column of values of index types."
             )
 
-        try:
-            res = self._return_or_inplace(
-                segmented_gather(self._column, lists_indices_col)
-            )
-        except RuntimeError as e:
-            if "contains nulls" in str(e):
-                raise ValueError("lists_indices contains null.") from e
-            raise
-        else:
-            return res
+        return self._return_or_inplace(
+            segmented_gather(self._column, lists_indices_col)
+        )
 
     def unique(self) -> ParentType:
         """
@@ -720,16 +694,9 @@ def concat(self, dropna=True) -> ParentType:
         1    [6.0, nan, 7.0, 8.0, 9.0]
         dtype: list
         """
-        try:
-            result = concatenate_list_elements(self._column, dropna=dropna)
-        except RuntimeError as e:
-            if "Rows of the input column must be lists." in str(e):
-                raise ValueError(
-                    "list.concat() can only be called on "
-                    "list columns with at least one level "
-                    "of nesting"
-                )
-        return self._return_or_inplace(result)
+        return self._return_or_inplace(
+            concatenate_list_elements(self._column, dropna=dropna)
+        )
 
     def astype(self, dtype):
         """
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 8d6ffe48957..d5ef5fb5d11 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -2379,29 +2379,18 @@ def get_json_object(
             dtype: object
         """
 
-        try:
-            options = libstrings.GetJsonObjectOptions(
-                allow_single_quotes=allow_single_quotes,
-                strip_quotes_from_single_strings=(
-                    strip_quotes_from_single_strings
-                ),
-                missing_fields_as_nulls=missing_fields_as_nulls,
-            )
-            res = self._return_or_inplace(
-                libstrings.get_json_object(
-                    self._column, cudf.Scalar(json_path, "str"), options
-                )
-            )
-        except RuntimeError as e:
-            matches = (
-                "Unrecognized JSONPath operator",
-                "Invalid empty name in JSONPath query string",
+        options = libstrings.GetJsonObjectOptions(
+            allow_single_quotes=allow_single_quotes,
+            strip_quotes_from_single_strings=(
+                strip_quotes_from_single_strings
+            ),
+            missing_fields_as_nulls=missing_fields_as_nulls,
+        )
+        return self._return_or_inplace(
+            libstrings.get_json_object(
+                self._column, cudf.Scalar(json_path, "str"), options
             )
-            if any(match in str(e) for match in matches):
-                raise ValueError("JSONPath value not found") from e
-            raise
-        else:
-            return res
+        )
 
     def split(
         self,
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index 7d113bbb9e2..a15afa727c0 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -520,10 +520,7 @@ def test_concatenate_large_column_strings():
     s_1 = cudf.Series(["very long string " * string_scale_f] * num_strings)
     s_2 = cudf.Series(["very long string " * string_scale_f] * num_strings)
 
-    with pytest.raises(
-        OverflowError,
-        match="total size of output is too large for a cudf column",
-    ):
+    with pytest.raises(OverflowError):
         cudf.concat([s_1, s_2])
 
 

From ac1cac67839ceb2272e1eba151782cfaa744270a Mon Sep 17 00:00:00 2001
From: Raza Jafri <razajafri@users.noreply.github.com>
Date: Sun, 26 Feb 2023 20:29:11 -0800
Subject: [PATCH 19/60] Add JNI methods for detecting and purging non-empty
 nulls from LIST and STRUCT (#12742)

This PR adds methods for detecting and purging non-empty nulls.

Authors:
  - Raza Jafri (https://github.com/razajafri)
  - Nghia Truong (https://github.com/ttnghia)
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12742
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 37 +++++++++++
 java/src/main/native/src/ColumnViewJni.cpp    | 22 +++++++
 .../java/ai/rapids/cudf/ColumnVectorTest.java | 62 +++++++++++++++++++
 3 files changed, 121 insertions(+)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 0cb9ed37d9f..84183819854 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -4639,6 +4639,10 @@ static native long makeCudfColumnView(int type, int scale, long data, long dataS
 
   static native long applyBooleanMask(long arrayColumnView, long booleanMaskHandle) throws CudfException;
 
+  static native boolean hasNonEmptyNulls(long handle) throws CudfException;
+
+  static native long purgeNonEmptyNulls(long handle) throws CudfException;
+
   /**
    * A utility class to create column vector like objects without refcounts and other APIs when
    * creating the device side vector from host side nested vectors. Eventually this can go away or
@@ -4997,4 +5001,37 @@ public HostColumnVector copyToHost() {
       }
     }
   }
+
+  /**
+   * Exact check if a column or its descendants have non-empty null rows
+   *
+   * @return Whether the column or its descendants have non-empty null rows
+   */
+  public boolean hasNonEmptyNulls() {
+    return hasNonEmptyNulls(viewHandle);
+  }
+
+  /**
+   * Copies this column into output while purging any non-empty null rows in the column or its
+   * descendants.
+   *
+   * If this column is not of compound type (LIST/STRING/STRUCT/DICTIONARY), the output will be
+   * the same as input.
+   *
+   * The purge operation only applies directly to LIST and STRING columns, but it applies indirectly
+   * to STRUCT/DICTIONARY columns as well, since these columns may have child columns that
+   * are LIST or STRING.
+   *
+   * Examples:
+   * lists = data: [{{0,1}, {2,3}, {4,5}} validity: {true, false, true}]
+   * lists[1] is null, but the list's child column still stores `{2,3}`.
+   *
+   * After purging the contents of the list's null rows, the column's contents will be:
+   * lists = [data: {{0,1}, {4,5}} validity: {true, false, true}]
+   *
+   * @return A new column with equivalent contents to `input`, but with null rows purged
+   */
+  public ColumnVector purgeNonEmptyNulls() {
+    return new ColumnVector(purgeNonEmptyNulls(viewHandle));
+  }
 }
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index c42cc430560..f2c361c5e8c 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -2457,4 +2457,26 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_applyBooleanMask(
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jboolean JNICALL
+Java_ai_rapids_cudf_ColumnView_hasNonEmptyNulls(JNIEnv *env, jclass, jlong column_view_handle) {
+  JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const *cv = reinterpret_cast<cudf::column_view const *>(column_view_handle);
+    return cudf::has_nonempty_nulls(*cv);
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jlong JNICALL
+Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls(JNIEnv *env, jclass, jlong column_view_handle) {
+  JNI_NULL_CHECK(env, column_view_handle, "column_view handle is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const *cv = reinterpret_cast<cudf::column_view const *>(column_view_handle);
+    return release_as_jlong(cudf::purge_nonempty_nulls(*cv));
+  }
+  CATCH_STD(env, 0);
+}
+
 } // extern "C"
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 937077c89c9..7848807dab8 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -32,6 +32,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.Optional;
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
@@ -6691,4 +6692,65 @@ void testApplyBooleanMaskFromListOfStructure() {
       assertColumnsAreEqual(expectedCv, actualCv);
     }
   }
+
+  /**
+   * The caller needs to make sure to close the returned ColumnView
+   */
+  private ColumnView[] getColumnViewWithNonEmptyNulls() {
+    List<Integer> list0 = Arrays.asList(1, 2, 3);
+    List<Integer> list1 = Arrays.asList(4, 5, null);
+    List<Integer> list2 = Arrays.asList(7, 8, 9);
+    List<Integer> list3 = null;
+    ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3);
+    // Modify the validity buffer
+    BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY);
+    try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) {
+      newValidity.copyFromDeviceBuffer(dmb);
+      BitVectorHelper.setNullAt(newValidity, 1);
+      dmb.copyFromHostBuffer(newValidity);
+    }
+    try (HostColumnVector hostColumnVector = input.copyToHost()) {
+      assert (hostColumnVector.isNull(1));
+      assert (hostColumnVector.isNull(3));
+    }
+    try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) {
+      ColumnView offsetsCvBeforePurge = input.getListOffsetsView();
+      assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge);
+    }
+    ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb,
+        input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews());
+    assertEquals(2, colWithNonEmptyNulls.nullCount);
+    return new ColumnView[]{input, colWithNonEmptyNulls};
+  }
+
+  @Test
+  void testPurgeNonEmptyNullsList() {
+    ColumnView[] values = getColumnViewWithNonEmptyNulls();
+    try (ColumnView colWithNonEmptyNulls = values[1];
+         ColumnView input = values[0];
+         // purge non-empty nulls
+         ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls();
+         ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
+         ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) {
+      assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls());
+      assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
+      assertFalse(colWithEmptyNulls.hasNonEmptyNulls());
+    }
+  }
+
+  @Test
+  void testPurgeNonEmptyNullsStruct() {
+    ColumnView[] values = getColumnViewWithNonEmptyNulls();
+    try (ColumnView listCol = values[1];
+         ColumnView input = values[0];
+         ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings");
+         ColumnView structView = ColumnView.makeStructView(stringsCol, listCol);
+         ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();
+         ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1);
+         ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
+         ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) {
+      assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
+      assertFalse(newListChild.hasNonEmptyNulls());
+    }
+  }
 }

From 202578307c81b1f7aad8fbe6f71ca9c7f3fa5c4a Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 27 Feb 2023 13:46:45 -0500
Subject: [PATCH 20/60] Improve performance for cudf::strings::count_characters
 for long strings (#12779)

Adds more efficient counting algorithm specifically for columns with long strings--greater than 64 bytes on average.
The internal detail method will be used to help improve performance in other strings functions.

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12779
---
 cpp/benchmarks/CMakeLists.txt     |  2 +-
 cpp/benchmarks/string/lengths.cpp | 56 +++++++++++++++++++
 cpp/src/strings/attributes.cu     | 91 +++++++++++++++++++++++++++++--
 cpp/tests/strings/attrs_tests.cpp | 22 ++++++--
 4 files changed, 160 insertions(+), 11 deletions(-)
 create mode 100644 cpp/benchmarks/string/lengths.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index c5ae3345da5..11da30f108a 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -295,7 +295,7 @@ ConfigureBench(
   string/url_decode.cu
 )
 
-ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp)
+ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengths.cpp)
 
 # ##################################################################################################
 # * json benchmark -------------------------------------------------------------------
diff --git a/cpp/benchmarks/string/lengths.cpp b/cpp/benchmarks/string/lengths.cpp
new file mode 100644
index 00000000000..4540e4a8f42
--- /dev/null
+++ b/cpp/benchmarks/string/lengths.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf/strings/attributes.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+static void bench_lengths(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+
+  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
+      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
+    state.skip("Skip benchmarks greater than size_type limit");
+  }
+
+  data_profile const table_profile = data_profile_builder().distribution(
+    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+  auto const table =
+    create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
+  cudf::strings_column_view input(table->view().column(0));
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  // gather some throughput statistics as well
+  auto chars_size = input.chars_size();
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);  // all bytes are read;
+  state.add_global_memory_writes<nvbench::int32_t>(num_rows);  // output is an integer per row
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto result = cudf::strings::count_characters(input);
+  });
+}
+
+NVBENCH_BENCH(bench_lengths)
+  .set_name("strings_lengths")
+  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096});
diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index 127d3aa8fe7..66288c7d14d 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,9 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/strings/attributes.hpp>
+#include <cudf/strings/detail/utf8.hpp>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -29,6 +31,7 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/execution_policy.h>
 #include <thrust/for_each.h>
@@ -37,10 +40,24 @@
 #include <thrust/transform.h>
 #include <thrust/transform_scan.h>
 
+#include <cub/warp/warp_reduce.cuh>
+
 namespace cudf {
 namespace strings {
 namespace detail {
 namespace {
+
+/**
+ * @brief Threshold to decide on using string or warp parallel functions.
+ *
+ * If the average byte length of a string in a column exceeds this value then
+ * the warp-parallel function is used.
+ * Otherwise, a regular string-parallel function is used.
+ *
+ * This value was found using the strings_lengths benchmark results.
+ */
+constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 64;
+
 /**
  * @brief Returns a numeric column containing lengths of each string in
  * based on the provided unary function.
@@ -85,21 +102,85 @@ std::unique_ptr<column> counts_fn(strings_column_view const& strings,
   return results;
 }
 
+/**
+ * @brief Count characters using a warp per string
+ *
+ * @param d_strings Column with strings to count
+ * @param d_lengths Results of the counts per string
+ */
+__global__ void count_characters_parallel_fn(column_device_view const d_strings,
+                                             size_type* d_lengths)
+{
+  size_type const idx = static_cast<size_type>(threadIdx.x + blockIdx.x * blockDim.x);
+  using warp_reduce   = cub::WarpReduce<size_type>;
+  __shared__ typename warp_reduce::TempStorage temp_storage;
+
+  if (idx >= (d_strings.size() * cudf::detail::warp_size)) { return; }
+
+  auto const str_idx  = idx / cudf::detail::warp_size;
+  auto const lane_idx = idx % cudf::detail::warp_size;
+  if (d_strings.is_null(str_idx)) {
+    d_lengths[str_idx] = 0;
+    return;
+  }
+  auto const d_str   = d_strings.element<string_view>(str_idx);
+  auto const str_ptr = d_str.data();
+
+  auto count = 0;
+  for (auto i = lane_idx; i < d_str.size_bytes(); i += cudf::detail::warp_size) {
+    count += static_cast<size_type>(is_begin_utf8_char(str_ptr[i]));
+  }
+  auto const char_count = warp_reduce(temp_storage).Sum(count);
+  if (lane_idx == 0) { d_lengths[str_idx] = char_count; }
+}
+
+std::unique_ptr<column> count_characters_parallel(strings_column_view const& input,
+                                                  rmm::cuda_stream_view stream,
+                                                  rmm::mr::device_memory_resource* mr)
+{
+  // create output column
+  auto results = make_numeric_column(data_type{type_to_id<size_type>()},
+                                     input.size(),
+                                     cudf::detail::copy_bitmask(input.parent(), stream, mr),
+                                     input.null_count(),
+                                     stream,
+                                     mr);
+
+  auto const d_lengths = results->mutable_view().data<size_type>();
+  auto const d_strings = cudf::column_device_view::create(input.parent(), stream);
+
+  // fill in the lengths
+  constexpr int block_size = 256;
+  cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size};
+  count_characters_parallel_fn<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+    *d_strings, d_lengths);
+
+  // reset null count after call to mutable_view()
+  results->set_null_count(input.null_count());
+
+  return results;
+}
+
 }  // namespace
 
-std::unique_ptr<column> count_characters(strings_column_view const& strings,
+std::unique_ptr<column> count_characters(strings_column_view const& input,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
 {
-  auto ufn = [] __device__(const string_view& d_str) { return d_str.length(); };
-  return counts_fn(strings, ufn, stream, mr);
+  if ((input.size() == input.null_count()) ||
+      ((input.chars_size() / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD)) {
+    auto ufn = [] __device__(string_view const& d_str) { return d_str.length(); };
+    return counts_fn(input, ufn, stream, mr);
+  }
+
+  return count_characters_parallel(input, stream, mr);
 }
 
 std::unique_ptr<column> count_bytes(strings_column_view const& strings,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
-  auto ufn = [] __device__(const string_view& d_str) { return d_str.size_bytes(); };
+  auto ufn = [] __device__(string_view const& d_str) { return d_str.size_bytes(); };
   return counts_fn(strings, ufn, stream, mr);
 }
 
diff --git a/cpp/tests/strings/attrs_tests.cpp b/cpp/tests/strings/attrs_tests.cpp
index 9ff2c55ed81..eff992604a6 100644
--- a/cpp/tests/strings/attrs_tests.cpp
+++ b/cpp/tests/strings/attrs_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -65,7 +65,7 @@ TEST_F(StringsAttributesTest, ZeroSizeStringsColumn)
 TEST_F(StringsAttributesTest, StringsLengths)
 {
   std::vector<const char*> h_strings{
-    "eee", "bb", nullptr, "", "aa", "ééé", " something a bit longer "};
+    "eee", "bb", nullptr, "", "aa", "ééé", "something a bit longer than 32 bytes"};
   cudf::test::strings_column_wrapper strings(
     h_strings.begin(),
     h_strings.end(),
@@ -74,17 +74,16 @@ TEST_F(StringsAttributesTest, StringsLengths)
 
   {
     auto results = cudf::strings::count_characters(strings_view);
-    std::vector<int32_t> h_expected{3, 2, 0, 0, 2, 3, 24};
+    std::vector<int32_t> h_expected{3, 2, 0, 0, 2, 3, 36};
     cudf::test::fixed_width_column_wrapper<int32_t> expected(
       h_expected.begin(),
       h_expected.end(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     auto results = cudf::strings::count_bytes(strings_view);
-    std::vector<int32_t> h_expected{3, 2, 0, 0, 2, 6, 24};
+    std::vector<int32_t> h_expected{3, 2, 0, 0, 2, 6, 36};
     cudf::test::fixed_width_column_wrapper<int32_t> expected(
       h_expected.begin(),
       h_expected.end(),
@@ -93,3 +92,16 @@ TEST_F(StringsAttributesTest, StringsLengths)
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
+
+TEST_F(StringsAttributesTest, StringsLengthsLong)
+{
+  std::vector<std::string> h_strings(
+    40000, "something a bit longer than 32 bytes ééé ééé ééé ééé ééé ééé ééé");
+  cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end());
+  auto strings_view = cudf::strings_column_view(strings);
+
+  auto results = cudf::strings::count_characters(strings_view);
+  std::vector<int32_t> h_expected(h_strings.size(), 64);
+  cudf::test::fixed_width_column_wrapper<int32_t> expected(h_expected.begin(), h_expected.end());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+}

From 9a91270b59f846cd4b0d6a1577183c6b333cdf57 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 27 Feb 2023 16:11:22 -0800
Subject: [PATCH 21/60] Remove tokenizers pre-install pinning. (#12854)

We pinned tokenizers during early wheel development because at the time the latest version on PyPI was only a source distribution without wheels available for arm and we didn't want to have to compile the package. More recent versions appear to be providing aarch binaries. Conversely, the pinned version (0.10.2) predated the existence of Python 3.10 wheels, which is resulting in nightly CI failing because Python 3.10 runs no longer find a wheel and attempt to compile from source.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12854
---
 .github/workflows/pr.yaml   | 3 +--
 .github/workflows/test.yaml | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 3a80139e333..c36c539a102 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -113,8 +113,7 @@ jobs:
       build_type: pull-request
       package-name: cudf
       # Install cupy-cuda11x for arm from a special index url
-      # Install tokenizers last binary wheel to avoid a Rust compile from the latest sdist
-      test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
+      test-before-arm64: "pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
       test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
       test-smoketest: "python ./ci/wheel_smoke_test_cudf.py"
   wheel-build-dask-cudf:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 4d9e97a7b28..5dc04ece919 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -86,7 +86,7 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       package-name: cudf
-      test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
+      test-before-arm64: "pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
       test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
   wheel-tests-dask-cudf:
     secrets: inherit

From 8a45ae266641d96f30c9336abcd656d5290beaa1 Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Tue, 28 Feb 2023 09:10:18 -0600
Subject: [PATCH 22/60] Make docs builds less verbose (#12836)

This PR adds the `--no-progress` flag to reduce verbose output from the `s3 sync` commands.

Authors:
  - Jake Awe (https://github.com/AyodeAwe)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12836
---
 ci/build_docs.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 9551d98e9fe..6daedb59733 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -41,7 +41,8 @@ popd
 
 
 if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then
-  aws s3 sync --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html"
-  aws s3 sync --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html"
-  aws s3 sync --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt"
+  rapids-logger "Upload Docs to S3"
+  aws s3 sync --no-progress --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html"
+  aws s3 sync --no-progress --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html"
+  aws s3 sync --no-progress --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt"
 fi

From a7e50920f35c667d041678f6a82d975355cdfd8f Mon Sep 17 00:00:00 2001
From: Peter Andreas Entschev <peter@entschev.com>
Date: Tue, 28 Feb 2023 19:35:20 +0100
Subject: [PATCH 23/60] Update RMM allocators (#12861)

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12861
---
 python/cudf/cudf/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 04b64e18594..7bab131a85a 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -8,6 +8,8 @@
 from numba import config as numba_config, cuda
 
 import rmm
+from rmm.allocators.cupy import rmm_cupy_allocator
+from rmm.allocators.numba import RMMNumbaManager
 
 from cudf import api, core, datasets, testing
 from cudf.api.extensions import (
@@ -96,8 +98,8 @@
 
     del patch_numba_linker_if_needed
 
-cuda.set_memory_manager(rmm.RMMNumbaManager)
-cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
+cuda.set_memory_manager(RMMNumbaManager)
+cupy.cuda.set_allocator(rmm_cupy_allocator)
 
 try:
     # Numba 0.54: Disable low occupancy warnings

From afdb51bb90969e8f2b7ec75dd79d8fb6c437d461 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Thu, 2 Mar 2023 02:05:35 +0530
Subject: [PATCH 24/60] Adds JSON reader, writer io benchmark (#12753)

- Add JSON writer benchmark. This benchmark is modeled after CSV writer.
- Add JSON reader benchmark with file data source ([NESTED_JSON](https://github.com/rapidsai/cudf/blob/branch-23.04/cpp/benchmarks/io/json/nested_json.cpp?rgh-link-date=2023-02-08T22%3A43%3A38Z) only does parsing and only on device buffers). This benchmark is modeled after BM_csv_read_io

fixes part of https://github.com/rapidsai/cudf/issues/12739

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/12753
---
 cpp/benchmarks/CMakeLists.txt                |   3 +-
 cpp/benchmarks/io/json/json_reader_input.cpp | 128 +++++++++++++++++++
 cpp/benchmarks/io/json/json_writer.cpp       | 125 ++++++++++++++++++
 cpp/src/io/json/write_json.cu                |   4 +
 4 files changed, 259 insertions(+), 1 deletion(-)
 create mode 100644 cpp/benchmarks/io/json/json_reader_input.cpp
 create mode 100644 cpp/benchmarks/io/json/json_writer.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 11da30f108a..e6b59c0b9f0 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -301,7 +301,8 @@ ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengt
 # * json benchmark -------------------------------------------------------------------
 ConfigureBench(JSON_BENCH string/json.cu)
 ConfigureNVBench(FST_NVBENCH io/fst.cu)
-ConfigureNVBench(NESTED_JSON_NVBENCH io/json/nested_json.cpp)
+ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp)
+ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp)
 
 # ##################################################################################################
 # * io benchmark ---------------------------------------------------------------------
diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp
new file mode 100644
index 00000000000..55614d040d5
--- /dev/null
+++ b/cpp/benchmarks/io/json/json_reader_input.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/io/cuio_common.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
+
+#include <cudf/io/json.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
+constexpr size_t data_size         = 512 << 20;
+constexpr cudf::size_type num_cols = 64;
+
+void json_read_common(cudf::io::json_writer_options const& write_opts,
+                      cuio_source_sink_pair& source_sink,
+                      nvbench::state& state)
+{
+  cudf::io::write_json(write_opts);
+
+  cudf::io::json_reader_options read_opts =
+    cudf::io::json_reader_options::builder(source_sink.make_source_info());
+
+  auto mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+             [&](nvbench::launch& launch, auto& timer) {
+               try_drop_l3_cache();
+
+               timer.start();
+               cudf::io::read_json(read_opts);
+               timer.stop();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
+}
+
+template <cudf::io::io_type IO>
+void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
+{
+  auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                         static_cast<int32_t>(data_type::FLOAT),
+                                         static_cast<int32_t>(data_type::DECIMAL),
+                                         static_cast<int32_t>(data_type::TIMESTAMP),
+                                         static_cast<int32_t>(data_type::DURATION),
+                                         static_cast<int32_t>(data_type::STRING),
+                                         static_cast<int32_t>(data_type::LIST),
+                                         static_cast<int32_t>(data_type::STRUCT)});
+
+  auto const source_type = IO;
+
+  auto const tbl = create_random_table(
+    cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
+  auto const view = tbl->view();
+
+  cuio_source_sink_pair source_sink(source_type);
+  cudf::io::json_writer_options const write_opts =
+    cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view).na_rep("null");
+
+  json_read_common(write_opts, source_sink, state);
+}
+
+template <data_type DataType, cudf::io::io_type IO>
+void BM_json_read_data_type(
+  nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
+{
+  auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
+  auto const source_type = IO;
+
+  auto const tbl = create_random_table(
+    cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
+  auto const view = tbl->view();
+
+  cuio_source_sink_pair source_sink(source_type);
+  cudf::io::json_writer_options const write_opts =
+    cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view).na_rep("null");
+
+  json_read_common(write_opts, source_sink, state);
+}
+
+using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
+                                            data_type::FLOAT,
+                                            data_type::DECIMAL,
+                                            data_type::TIMESTAMP,
+                                            data_type::DURATION,
+                                            data_type::STRING,
+                                            data_type::LIST,
+                                            data_type::STRUCT>;
+
+using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
+                                        cudf::io::io_type::HOST_BUFFER,
+                                        cudf::io::io_type::DEVICE_BUFFER>;
+
+using compression_list =
+  nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
+
+NVBENCH_BENCH_TYPES(BM_json_read_data_type,
+                    NVBENCH_TYPE_AXES(d_type_list,
+                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
+  .set_name("json_read_data_type")
+  .set_type_axes_names({"data_type", "io"})
+  .set_min_samples(4);
+
+NVBENCH_BENCH_TYPES(BM_json_read_io, NVBENCH_TYPE_AXES(io_list))
+  .set_name("json_read_io")
+  .set_type_axes_names({"io"})
+  .set_min_samples(4);
diff --git a/cpp/benchmarks/io/json/json_writer.cpp b/cpp/benchmarks/io/json/json_writer.cpp
new file mode 100644
index 00000000000..ee183b327fe
--- /dev/null
+++ b/cpp/benchmarks/io/json/json_writer.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/io/cuio_common.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
+
+#include <cudf/io/json.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
+constexpr size_t data_size         = 512 << 20;
+constexpr cudf::size_type num_cols = 64;
+
+void json_write_common(cudf::io::json_writer_options const& write_opts,
+                       cuio_source_sink_pair& source_sink,
+                       size_t const data_size,
+                       nvbench::state& state)
+{
+  auto mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+             [&](nvbench::launch& launch, auto& timer) {
+               try_drop_l3_cache();
+
+               timer.start();
+               cudf::io::write_json(write_opts);
+               timer.stop();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
+}
+
+template <cudf::io::io_type IO>
+void BM_json_write_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
+{
+  auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                         static_cast<int32_t>(data_type::FLOAT),
+                                         static_cast<int32_t>(data_type::DECIMAL),
+                                         static_cast<int32_t>(data_type::TIMESTAMP),
+                                         static_cast<int32_t>(data_type::DURATION),
+                                         static_cast<int32_t>(data_type::STRING),
+                                         static_cast<int32_t>(data_type::LIST),
+                                         static_cast<int32_t>(data_type::STRUCT)});
+
+  auto const source_type = IO;
+
+  auto const tbl = create_random_table(
+    cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
+  auto const view = tbl->view();
+
+  cuio_source_sink_pair source_sink(source_type);
+  cudf::io::json_writer_options write_opts =
+    cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view).na_rep("null");
+
+  json_write_common(write_opts, source_sink, data_size, state);
+}
+
+void BM_json_writer_options(nvbench::state& state)
+{
+  auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                         static_cast<int32_t>(data_type::FLOAT),
+                                         static_cast<int32_t>(data_type::DECIMAL),
+                                         static_cast<int32_t>(data_type::TIMESTAMP),
+                                         static_cast<int32_t>(data_type::DURATION),
+                                         static_cast<int32_t>(data_type::STRING),
+                                         static_cast<int32_t>(data_type::LIST),
+                                         static_cast<int32_t>(data_type::STRUCT)});
+
+  auto const source_type    = io_type::HOST_BUFFER;
+  bool const json_lines     = state.get_int64("json_lines");
+  bool const include_nulls  = state.get_int64("include_nulls");
+  auto const rows_per_chunk = state.get_int64("rows_per_chunk");
+
+  auto const tbl = create_random_table(
+    cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
+  auto const view = tbl->view();
+
+  cuio_source_sink_pair source_sink(source_type);
+  cudf::io::json_writer_options write_opts =
+    cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view)
+      .na_rep("null")
+      .lines(json_lines)
+      .include_nulls(include_nulls)
+      .rows_per_chunk(rows_per_chunk);
+
+  json_write_common(write_opts, source_sink, data_size, state);
+}
+
+using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
+                                        cudf::io::io_type::HOST_BUFFER,
+                                        cudf::io::io_type::DEVICE_BUFFER>;
+
+NVBENCH_BENCH_TYPES(BM_json_write_io, NVBENCH_TYPE_AXES(io_list))
+  .set_name("json_write_io")
+  .set_type_axes_names({"io"})
+  .set_min_samples(4);
+
+NVBENCH_BENCH(BM_json_writer_options)
+  .set_name("json_write_options")
+  .set_min_samples(4)
+  .add_int64_axis("json_lines", {false, true})
+  .add_int64_axis("include_nulls", {false, true})
+  .add_int64_power_of_two_axis("rows_per_chunk", nvbench::range(10, 20, 2));
diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu
index a7ae4d3bdd1..b4bcb5548de 100644
--- a/cpp/src/io/json/write_json.cu
+++ b/cpp/src/io/json/write_json.cu
@@ -240,6 +240,7 @@ std::unique_ptr<column> struct_to_strings(table_view const& strings_columns,
                                           rmm::cuda_stream_view stream,
                                           rmm::mr::device_memory_resource* mr)
 {
+  CUDF_FUNC_RANGE();
   CUDF_EXPECTS(column_names.type().id() == type_id::STRING, "Column names must be of type string");
   auto const num_columns = strings_columns.num_columns();
   CUDF_EXPECTS(num_columns == column_names.size(),
@@ -481,6 +482,7 @@ struct column_to_strings_fn {
                                      column_iterator column_end,
                                      host_span<column_name_info const> children_names) const
   {
+    CUDF_FUNC_RANGE();
     auto const num_columns = std::distance(column_begin, column_end);
     auto column_names      = make_column_names_column(children_names, num_columns, stream_);
     auto column_names_view = column_names->view();
@@ -590,6 +592,7 @@ void write_chunked(data_sink* out_sink,
                    rmm::cuda_stream_view stream,
                    rmm::mr::device_memory_resource* mr)
 {
+  CUDF_FUNC_RANGE();
   CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column.");
 
   auto p_str_col_w_nl = cudf::strings::detail::join_strings(str_column_view,
@@ -620,6 +623,7 @@ void write_json(data_sink* out_sink,
                 rmm::cuda_stream_view stream,
                 rmm::mr::device_memory_resource* mr)
 {
+  CUDF_FUNC_RANGE();
   std::vector<column_name_info> user_column_names = [&]() {
     auto const& metadata = options.get_metadata();
     if (metadata.has_value() and not metadata->schema_info.empty()) {

From 195e2f7b531ce4c5ff80ff5985dd82b7eae43134 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 1 Mar 2023 15:00:13 -0600
Subject: [PATCH 25/60] Fix docs build to be `pydata-sphinx-theme=0.13.0`
 compatible (#12874)

This PR fixes cudf docs build to be compatible with `pydata-sphinx-theme=0.13.0` by adding an empty `icon_links` entry.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12874
---
 docs/cudf/source/conf.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index 3d92d955263..b97879b0ae4 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -45,8 +45,8 @@
     "myst_nb",
 ]
 
-jupyter_execute_notebooks = "force"
-execution_timeout = 300
+nb_execution_mode = "force"
+nb_execution_timeout = 300
 
 copybutton_prompt_text = ">>> "
 autosummary_generate = True
@@ -103,6 +103,8 @@
 
 html_theme_options = {
     "external_links": [],
+    # https://github.com/pydata/pydata-sphinx-theme/issues/1220
+    "icon_links": [],
     "github_url": "https://github.com/rapidsai/cudf",
     "twitter_url": "https://twitter.com/rapidsai",
     "show_toc_level": 1,

From 40e56c94e8450603a169546faad36397f2aef8aa Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Wed, 1 Mar 2023 15:32:54 -0800
Subject: [PATCH 26/60] Parquet writer column_size() should return a size_t
 (#12870)

Fixes #12867.

Bug introduced in #12685. A calculation of total bytes in a column was returned in a 32-bit `size_type` rather than 64-bit `size_t` leading to overflow for tables with many millions of rows.

Authors:
  - Ed Seidl (https://github.com/etseidl)
  - Vukasin Milovanovic (https://github.com/vuule)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Karthikeyan (https://github.com/karthikeyann)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12870
---
 cpp/src/io/parquet/writer_impl.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 88176ee1901..2c9bff33a14 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -87,7 +87,7 @@ parquet::Compression to_parquet_compression(compression_type compression)
   }
 }
 
-size_type column_size(column_view const& column, rmm::cuda_stream_view stream)
+size_t column_size(column_view const& column, rmm::cuda_stream_view stream)
 {
   if (column.size() == 0) { return 0; }
 
@@ -99,7 +99,7 @@ size_type column_size(column_view const& column, rmm::cuda_stream_view stream)
            cudf::detail::get_value<size_type>(scol.offsets(), 0, stream);
   } else if (column.type().id() == type_id::STRUCT) {
     auto const scol = structs_column_view(column);
-    size_type ret   = 0;
+    size_t ret      = 0;
     for (int i = 0; i < scol.num_children(); i++) {
       ret += column_size(scol.get_sliced_child(i), stream);
     }

From 8747daa66d2b181c5c95d72998d78ab00a5daf60 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Mar 2023 11:31:18 -0800
Subject: [PATCH 27/60] Use python -m pytest for nightly wheel tests (#12871)

Nightly wheel tests are failing with this error:
```
+ sh -c 'pytest -v -n 8 ./python/cudf/cudf/tests'
sh: 1: pytest: not found
```

However, pytest is being installed.

The wheel itself is being installed with `python -m pip`, like:
```
python -m pip install --verbose './dist/cudf_cu11-23.4.0.1677647948-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl[test]'
```

Calling `python -m pytest` may resolve this error. For consistency, we should use `python -m` for all executable modules (pip and pytest).

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Jordan Jacobelli (https://github.com/jjacobelli)

URL: https://github.com/rapidsai/cudf/pull/12871
---
 .github/workflows/pr.yaml   | 10 +++++-----
 .github/workflows/test.yaml |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c36c539a102..d02825b73d1 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -113,8 +113,8 @@ jobs:
       build_type: pull-request
       package-name: cudf
       # Install cupy-cuda11x for arm from a special index url
-      test-before-arm64: "pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
-      test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
+      test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
+      test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests"
       test-smoketest: "python ./ci/wheel_smoke_test_cudf.py"
   wheel-build-dask-cudf:
     needs: wheel-tests-cudf
@@ -124,7 +124,7 @@ jobs:
       build_type: pull-request
       package-name: dask_cudf
       package-dir: python/dask_cudf
-      before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && pip install --no-deps ./local-cudf/cudf*.whl"
+      before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && python -m pip install --no-deps ./local-cudf/cudf*.whl"
       uses-setup-env-vars: false
   wheel-tests-dask-cudf:
     needs: wheel-build-dask-cudf
@@ -133,5 +133,5 @@ jobs:
     with:
       build_type: pull-request
       package-name: dask_cudf
-      test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && pip install --no-deps ./local-cudf-dep/cudf*.whl"
-      test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
+      test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl"
+      test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 5dc04ece919..c808e1475e6 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -86,8 +86,8 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       package-name: cudf
-      test-before-arm64: "pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
-      test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
+      test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
+      test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests"
   wheel-tests-dask-cudf:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
@@ -97,4 +97,4 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       package-name: dask_cudf
-      test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
+      test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"

From 00c887b143a805b8f967eb91d20d2f651c193e28 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 3 Mar 2023 16:21:13 -0800
Subject: [PATCH 28/60] Deallocate encoded data in ORC writer immediately after
 compression (#12770)

Reduces total memory footprint over the execution time (but not peak memory usage, which happens during compression). This change helps overall memory usage when writing multiple files in parallel using the same GPU.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/12770
---
 cpp/src/io/orc/writer_impl.cu | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index a6effeefc6c..8d85b001817 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -2246,6 +2246,10 @@ void writer::impl::write(table_view const& table)
                                   enc_data.streams,
                                   comp_results,
                                   stream);
+
+      // deallocate encoded data as it is not needed anymore
+      enc_data.data = rmm::device_uvector<uint8_t>{0, stream};
+
       strm_descs.device_to_host(stream);
       comp_results.device_to_host(stream, true);
     }

From 2689bb698e8e8e11659d599172db8cbc36b76e0f Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Fri, 3 Mar 2023 17:03:05 -0800
Subject: [PATCH 29/60] Throw an exception if an unsupported page encoding is
 detected in Parquet reader (#12754)

If the Parquet reader comes across a page encoded with an unsupported encoding, the call to decode page data silently fails, leading to either an empty table or unrelated exceptions being thrown.  This PR adds code to validate the page encodings after the page headers are decoded.

Authors:
  - Ed Seidl (https://github.com/etseidl)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - MithunR (https://github.com/mythrocks)
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12754
---
 cpp/src/io/parquet/page_data.cu               |   2 ++
 cpp/src/io/parquet/parquet_common.hpp         |   5 +++--
 cpp/src/io/parquet/reader_impl_preprocess.cu  |  18 ++++++++++++++++++
 .../tests/data/parquet/delta_encoding.parquet | Bin 0 -> 577 bytes
 python/cudf/cudf/tests/test_parquet.py        |   8 ++++++++
 5 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 python/cudf/cudf/tests/data/parquet/delta_encoding.parquet

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index ee115e7432a..f377e833645 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1167,6 +1167,8 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       s->dict_bits = 0;
       s->dict_base = nullptr;
       s->dict_size = 0;
+      // NOTE:  if additional encodings are supported in the future, modifications must
+      // be made to is_supported_encoding() in reader_impl_preprocess.cu
       switch (s->page.encoding) {
         case Encoding::PLAIN_DICTIONARY:
         case Encoding::RLE_DICTIONARY:
diff --git a/cpp/src/io/parquet/parquet_common.hpp b/cpp/src/io/parquet/parquet_common.hpp
index d56f2fb08ca..ab6290c4ed6 100644
--- a/cpp/src/io/parquet/parquet_common.hpp
+++ b/cpp/src/io/parquet/parquet_common.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,11 +86,12 @@ enum class Encoding : uint8_t {
   GROUP_VAR_INT           = 1,  // Deprecated, never used
   PLAIN_DICTIONARY        = 2,
   RLE                     = 3,
-  BIT_PACKED              = 4,
+  BIT_PACKED              = 4,  // Deprecated by parquet-format in 2013, superseded by RLE
   DELTA_BINARY_PACKED     = 5,
   DELTA_LENGTH_BYTE_ARRAY = 6,
   DELTA_BYTE_ARRAY        = 7,
   RLE_DICTIONARY          = 8,
+  BYTE_STREAM_SPLIT       = 9,
 };
 
 /**
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 0f55cd6e400..6b6ad5a2f7d 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -307,6 +307,18 @@ template <typename T = uint8_t>
   return total_pages;
 }
 
+// see setupLocalPageInfo() in page_data.cu for supported page encodings
+constexpr bool is_supported_encoding(Encoding enc)
+{
+  switch (enc) {
+    case Encoding::PLAIN:
+    case Encoding::PLAIN_DICTIONARY:
+    case Encoding::RLE:
+    case Encoding::RLE_DICTIONARY: return true;
+    default: return false;
+  }
+}
+
 /**
  * @brief Decode the page information from the given column chunks.
  *
@@ -329,6 +341,12 @@ void decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
   chunks.host_to_device(stream);
   gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream);
   pages.device_to_host(stream, true);
+
+  // validate page encodings
+  CUDF_EXPECTS(std::all_of(pages.begin(),
+                           pages.end(),
+                           [](auto const& page) { return is_supported_encoding(page.encoding); }),
+               "Unsupported page encoding detected");
 }
 
 /**
diff --git a/python/cudf/cudf/tests/data/parquet/delta_encoding.parquet b/python/cudf/cudf/tests/data/parquet/delta_encoding.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e129ced34f3b570ba0ae966277f2111f8f539465
GIT binary patch
literal 577
zcmZWnQES^U5LOf~Ghr`-O2}YJ5M5{?6|vPcX($_gE5(d5`nHy#C`xN4+o`Q=lo06q
z-u5H+J9bXe=AnAIyYIfc`%cpJ#kYvG$Ufn>U=YEi4c0@b*=l}4?Stor*TfjX2|nV0
z`Hk9z18#HUK}|pb2&qpJaOya@{7}tR`SVAmA55kQMgz<|UD!ShXeH;s8hFgJY2_eC
zyib_DH`Zh(cZlL348pD-5-%X_V6-k<Q(q_q(-ys?N!LH$sn|)Q=<rqB4h4a*SR2M6
z`nN&$=neUdpbPH$G}#0FJ;sXiUZ}YO&0;NI{4!;g7CCz-g%lBDM{gL_qZP2}<H9&m
z7QaPioSG`9*q_FdyQjI~V_vy(p&jq>S!(RG;^Xg2o~x`*)mH+E4e_-=m8E(DczY!`
zn>Mz{?*Y_B?o^srUl-Q$F|;<T9?v0A6ALYCpvPZi@bPlBx?3T3wH0}sm7VQYok&En
zh&zXqMQW#=;n2vw9`(<p9Gx0Hh_%!+b*kmbWN2nm#*vI;eRgsbgdx1&KllmP@Ol0N
DBejFC

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 661497e4650..7cc67347467 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -2767,3 +2767,11 @@ def test_parquet_reader_malformed_file(datadir):
     # expect a failure when reading the whole file
     with pytest.raises(RuntimeError):
         cudf.read_parquet(fname)
+
+
+def test_parquet_reader_unsupported_page_encoding(datadir):
+    fname = datadir / "delta_encoding.parquet"
+
+    # expect a failure when reading the whole file
+    with pytest.raises(RuntimeError):
+        cudf.read_parquet(fname)

From 45a9e825f975ca55f57f5af9523ec0b88839ff56 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Sat, 4 Mar 2023 13:40:36 -0800
Subject: [PATCH 30/60] Use test paths relative to package directory. (#12751)

This PR standardizes how our test paths are defined for dask-cudf and custreamz. This was originally a test to investigate a question related to CodeCov, which is answered below. We noticed that some coverage reports in the CI outputs used full paths like `/opt/conda/envs/test/lib/python3.10/site-packages/cudf/__init__.py` while other coverage reports gave relative paths like `cudf/__init__.py`. The net result of this PR is that coverage reports are unchanged, but our CI tests are more correct because they always pull from the installed packages rather than the local packages (`import dask_cudf` is fetching the built conda packages from `site-packages` and not importing from the current path in the cloned repo).

Authors:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Jordan Jacobelli (https://github.com/jjacobelli)

URL: https://github.com/rapidsai/cudf/pull/12751
---
 ci/test_python_other.sh      | 12 ++++++------
 python/custreamz/.coveragerc |  3 +++
 2 files changed, 9 insertions(+), 6 deletions(-)
 create mode 100644 python/custreamz/.coveragerc

diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
index 25ed615df84..ab36fbbb5ff 100755
--- a/ci/test_python_other.sh
+++ b/ci/test_python_other.sh
@@ -17,31 +17,31 @@ trap "EXITCODE=1" ERR
 set +e
 
 rapids-logger "pytest dask_cudf"
-pushd python/dask_cudf
+pushd python/dask_cudf/dask_cudf
 pytest \
   --cache-clear \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
   --numprocesses=8 \
   --dist=loadscope \
-  --cov-config=.coveragerc \
+  --cov-config=../.coveragerc \
   --cov=dask_cudf \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
   --cov-report=term \
-  dask_cudf
+  tests
 popd
 
 rapids-logger "pytest custreamz"
-pushd python/custreamz
+pushd python/custreamz/custreamz
 pytest \
   --cache-clear \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-custreamz.xml" \
   --numprocesses=8 \
   --dist=loadscope \
-  --cov-config=.coveragerc \
+  --cov-config=../.coveragerc \
   --cov=custreamz \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
   --cov-report=term \
-  custreamz
+  tests
 popd
 
 rapids-logger "Test script exiting with value: $EXITCODE"
diff --git a/python/custreamz/.coveragerc b/python/custreamz/.coveragerc
new file mode 100644
index 00000000000..26bed7816ed
--- /dev/null
+++ b/python/custreamz/.coveragerc
@@ -0,0 +1,3 @@
+# Configuration file for Python coverage tests
+[run]
+source = custreamz

From 77b5014200ad6415e06e43216db477cafeb07c63 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 6 Mar 2023 12:39:17 -0500
Subject: [PATCH 31/60] Expect cupy to now support bool arrays for dlpack.
 (#12883)

The most recent versions of cupy now support bool arrays in dlpack (see [the release notes](https://github.com/cupy/cupy/releases/tag/v11.6.0) and [the PR adding the feature](https://github.com/cupy/cupy/pull/7376)). Our DataFrame protocol tests were expecting a failure.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12883
---
 python/cudf/cudf/tests/test_df_protocol.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 7dbca90ab03..8a53c77da66 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -39,15 +39,12 @@ def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
         cudfcol.apply_boolean_mask(non_null_idxs),
     )
 
-    if dtype[0] != _DtypeKind.BOOL:
-        array_from_dlpack = cp.from_dlpack(buf.__dlpack__()).get()
-        col_array = cp.asarray(cudfcol.data_array_view(mode="read")).get()
-        assert_eq(
-            array_from_dlpack[non_null_idxs.to_numpy()].flatten(),
-            col_array[non_null_idxs.to_numpy()].flatten(),
-        )
-    else:
-        pytest.raises(TypeError, buf.__dlpack__)
+    array_from_dlpack = cp.from_dlpack(buf.__dlpack__()).get()
+    col_array = cp.asarray(cudfcol.data_array_view(mode="read")).get()
+    assert_eq(
+        array_from_dlpack[non_null_idxs.to_numpy()].flatten(),
+        col_array[non_null_idxs.to_numpy()].flatten(),
+    )
 
 
 def assert_column_equal(col: _CuDFColumn, cudfcol):

From b6d9fd1f26ccda9b3043a8967621bae234187c37 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 6 Mar 2023 10:04:08 -0800
Subject: [PATCH 32/60] Split C++ and Python build dependencies into separate
 lists. (#12724)

This PR splits the build dependencies in `dependencies.yaml` into C++, Python, and shared (`all`) dependency lists. This makes it easier to understand the dependency tree for libcudf and cudf Python.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12724
---
 .../all_cuda-118_arch-x86_64.yaml             |  1 +
 dependencies.yaml                             | 35 +++++++++++++------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 44d6be65574..00aa1e2bc0f 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -33,6 +33,7 @@ dependencies:
 - ipython
 - libarrow=10
 - librdkafka=1.7.0
+- librmm=23.04.*
 - mimesis>=4.1.0
 - moto>=4.0.8
 - myst-nb
diff --git a/dependencies.yaml b/dependencies.yaml
index ba6d240e069..2c359d58378 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -6,7 +6,9 @@ files:
       cuda: ["11.8"]
       arch: [x86_64]
     includes:
-      - build
+      - build_all
+      - build_cpp
+      - build_python
       - cudatoolkit
       - develop
       - docs
@@ -30,7 +32,7 @@ files:
   test_java:
     output: none
     includes:
-      - build
+      - build_all
       - cudatoolkit
       - test_java
   test_notebooks:
@@ -57,25 +59,18 @@ channels:
   - conda-forge
   - nvidia
 dependencies:
-  build:
+  build_all:
     common:
       - output_types: [conda, requirements]
         packages:
           - &cmake_ver cmake>=3.23.1,!=3.25.0
-          - cuda-python>=11.7.1,<12.0
-          - cython>=0.29,<0.30
           - dlpack>=0.5,<0.6.0a0
           - ninja
-          - pyarrow=10
-          - rmm=23.04.*
-          - scikit-build>=0.13.1
       - output_types: conda
         packages:
           - libarrow=10
           - c-compiler
           - cxx-compiler
-          - librdkafka=1.7.0
-          - protobuf=4.21
     specific:
       - output_types: conda
         matrices:
@@ -101,6 +96,26 @@ dependencies:
               cuda: "11.8"
             packages:
               - nvcc_linux-aarch64=11.8
+  build_cpp:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - librmm=23.04.*
+      - output_types: conda
+        packages:
+          - librdkafka=1.7.0
+  build_python:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - cuda-python>=11.7.1,<12.0
+          - cython>=0.29,<0.30
+          - pyarrow=10
+          - rmm=23.04.*
+          - scikit-build>=0.13.1
+      - output_types: conda
+        packages:
+          - protobuf=4.21
   cudatoolkit:
     specific:
       - output_types: conda

From 39398e4dedb0b2537e07d4aea1f52ac23ea8108b Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 6 Mar 2023 13:34:21 -0600
Subject: [PATCH 33/60] Deprecate `datetime_is_numeric` from `describe`
 (#12818)

`datetime_is_numeric` is non-functional parameter in `cudf` and is removed in `pandas 2.0`, hence deprecating this param for removal.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12818
---
 .../developer_guide/contributing_guide.md      |  8 ++++++++
 python/cudf/cudf/core/dataframe.py             | 12 +++++++++++-
 python/cudf/cudf/core/series.py                |  8 ++++++++
 python/cudf/cudf/tests/test_dataframe.py       | 18 ++++++++++++------
 python/cudf/cudf/tests/test_series.py          | 12 ++++++++----
 python/cudf/cudf/utils/docutils.py             |  7 ++++++-
 6 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/docs/cudf/source/developer_guide/contributing_guide.md b/docs/cudf/source/developer_guide/contributing_guide.md
index b5ea9519842..3244959b14c 100644
--- a/docs/cudf/source/developer_guide/contributing_guide.md
+++ b/docs/cudf/source/developer_guide/contributing_guide.md
@@ -67,6 +67,14 @@ Deprecations should be signaled using a `FutureWarning` **not a `DeprecationWarn
 `DeprecationWarning` is hidden by default except in code run in the `__main__` module.
 ```
 
+Deprecations should also be specified in the respective public API docstring using a
+`deprecated` admonition:
+
+```
+.. deprecated:: 23.08
+    `foo` is deprecated and will be removed in a future version of cudf.
+```
+
 ## `pandas` compatibility
 
 Maintaining compatibility with the [pandas API](https://pandas.pydata.org/docs/reference/index.html) is a primary goal of cuDF.
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d43621d3d36..fd6e9e2687d 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4937,6 +4937,13 @@ def describe(
             default_include = [np.number]
             if datetime_is_numeric:
                 default_include.append("datetime")
+            else:
+                warnings.warn(
+                    "`datetime_is_numeric` is deprecated. Specify "
+                    "`datetime_is_numeric=True` to silence this "
+                    "warning and adopt the future behavior now.",
+                    FutureWarning,
+                )
             data_to_describe = self.select_dtypes(include=default_include)
             if data_to_describe._num_columns == 0:
                 data_to_describe = self
@@ -4955,7 +4962,10 @@ def describe(
                 raise ValueError("No data of included types.")
 
         describe_series_list = [
-            data_to_describe[col].describe(percentiles=percentiles)
+            data_to_describe[col].describe(
+                percentiles=percentiles,
+                datetime_is_numeric=datetime_is_numeric,
+            )
             for col in data_to_describe._column_names
         ]
         if len(describe_series_list) == 1:
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 60655c5a6f9..79927c60a85 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -6,6 +6,7 @@
 import inspect
 import pickle
 import textwrap
+import warnings
 from collections import abc
 from shutil import get_terminal_size
 from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Union
@@ -3111,6 +3112,13 @@ def describe(
     ):
         """{docstring}"""
 
+        if not datetime_is_numeric:
+            warnings.warn(
+                "`datetime_is_numeric` is deprecated and will be removed in "
+                "a future release. Specify `datetime_is_numeric=True` to "
+                "silence this warning and adopt the future behavior now.",
+                FutureWarning,
+            )
         if percentiles is not None:
             if not all(0 <= x <= 1 for x in percentiles):
                 raise ValueError(
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 13f312f6f0c..7ddfa3a7f48 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3656,7 +3656,8 @@ def test_dataframe_describe_exclude():
     df["x"] = df.x.astype("int64")
     df["y"] = np.random.normal(10, 1, data_length)
     pdf = df.to_pandas()
-    gdf_results = df.describe(exclude=["float"])
+    with pytest.warns(FutureWarning):
+        gdf_results = df.describe(exclude=["float"])
     pdf_results = pdf.describe(exclude=["float"])
 
     assert_eq(gdf_results, pdf_results)
@@ -3671,7 +3672,8 @@ def test_dataframe_describe_include():
     df["x"] = df.x.astype("int64")
     df["y"] = np.random.normal(10, 1, data_length)
     pdf = df.to_pandas()
-    gdf_results = df.describe(include=["int"])
+    with pytest.warns(FutureWarning):
+        gdf_results = df.describe(include=["int"])
     pdf_results = pdf.describe(include=["int"])
 
     assert_eq(gdf_results, pdf_results)
@@ -3685,7 +3687,8 @@ def test_dataframe_describe_default():
     df["x"] = np.random.normal(10, 1, data_length)
     df["y"] = np.random.normal(10, 1, data_length)
     pdf = df.to_pandas()
-    gdf_results = df.describe()
+    with pytest.warns(FutureWarning):
+        gdf_results = df.describe()
     pdf_results = pdf.describe()
 
     assert_eq(pdf_results, gdf_results)
@@ -3702,7 +3705,8 @@ def test_series_describe_include_all():
     df["animal"] = np.random.choice(["dog", "cat", "bird"], data_length)
 
     pdf = df.to_pandas()
-    gdf_results = df.describe(include="all")
+    with pytest.warns(FutureWarning):
+        gdf_results = df.describe(include="all")
     pdf_results = pdf.describe(include="all")
 
     assert_eq(gdf_results[["x", "y"]], pdf_results[["x", "y"]])
@@ -3723,7 +3727,8 @@ def test_dataframe_describe_percentiles():
     df["x"] = np.random.normal(10, 1, data_length)
     df["y"] = np.random.normal(10, 1, data_length)
     pdf = df.to_pandas()
-    gdf_results = df.describe(percentiles=sample_percentiles)
+    with pytest.warns(FutureWarning):
+        gdf_results = df.describe(percentiles=sample_percentiles)
     pdf_results = pdf.describe(percentiles=sample_percentiles)
 
     assert_eq(pdf_results, gdf_results)
@@ -4050,7 +4055,8 @@ def test_empty_dataframe_describe():
     gdf = cudf.from_pandas(pdf)
 
     expected = pdf.describe()
-    actual = gdf.describe()
+    with pytest.warns(FutureWarning):
+        actual = gdf.describe()
 
     assert_eq(expected, actual)
 
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index b3c7c9ac9bb..eb05468d923 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -408,7 +408,8 @@ def test_series_size(data):
 def test_series_describe_numeric(dtype):
     ps = pd.Series([0, 1, 2, 3, 1, 2, 3], dtype=dtype)
     gs = cudf.from_pandas(ps)
-    actual = gs.describe()
+    with pytest.warns(FutureWarning):
+        actual = gs.describe()
     expected = ps.describe()
 
     assert_eq(expected, actual, check_dtype=True)
@@ -426,7 +427,8 @@ def test_series_describe_datetime(dtype):
     # Treating datetimes as categoricals is deprecated in pandas and will
     # be removed in future. Future behavior is treating datetime as numeric.
     expected = ps.describe(datetime_is_numeric=True)
-    actual = gs.describe()
+    with pytest.warns(FutureWarning):
+        actual = gs.describe()
 
     assert_eq(expected.astype("str"), actual)
 
@@ -437,7 +439,8 @@ def test_series_describe_timedelta(dtype):
     gs = cudf.from_pandas(ps)
 
     expected = ps.describe()
-    actual = gs.describe()
+    with pytest.warns(FutureWarning):
+        actual = gs.describe()
 
     assert_eq(actual, expected.astype("str"))
 
@@ -462,7 +465,8 @@ def test_series_describe_other_types(ps):
     gs = cudf.from_pandas(ps)
 
     expected = ps.describe()
-    actual = gs.describe()
+    with pytest.warns(FutureWarning):
+        actual = gs.describe()
 
     if len(ps) == 0:
         assert_eq(expected.fillna("a").astype("str"), actual.fillna("a"))
diff --git a/python/cudf/cudf/utils/docutils.py b/python/cudf/cudf/utils/docutils.py
index 72ebae05b6f..5a7b8bae980 100644
--- a/python/cudf/cudf/utils/docutils.py
+++ b/python/cudf/cudf/utils/docutils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 """
 Helper functions for parameterized docstring
@@ -130,6 +130,11 @@ def wrapper(func):
             For DataFrame input, this also controls whether datetime columns
             are included by default.
 
+            .. deprecated:: 23.04
+
+               `datetime_is_numeric` is deprecated and will be removed in
+               a future version of cudf.
+
         Returns
         -------
         output_frame : Series or DataFrame

From 7dade51f8636f00153b0c0f190dbbb352c5b309b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 6 Mar 2023 14:45:55 -0500
Subject: [PATCH 34/60] Migrate as much as possible to pyproject.toml (#12850)

I migrated as much build logic as possible to pyproject.toml for cudf, dask_cudf, and custreamz. A couple of notes:
- I didn't move cudf_kafka because it is currently still using a pure setuptools build with Cython files. We may as well do the pyproject.toml migration there after we have also done the scikit-build conversion since there's a lot of logic that can't be removed from setup.py until then. I'm also not entirely confident that everything will work as expected if we move data to pyproject.toml without switching to scikit-build based on issues like the ones mentioned in [this SO post about Cython and project data stored in pyproject.toml](https://stackoverflow.com/questions/73800736/pyproject-toml-and-cython-extension-module).
- I could get rid of setup.py entirely for custreamz at this point, but I expect that we'll run into a host of additional issues when we do eventually move to requiring `pip install X` instead of `python setup.py X` across RAPIDS and I'd rather deal with all of those at the same time with our main packages rather than using an esoteric package like custreamz as the test case.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12850
---
 ci/release/apply_wheel_modifications.sh | 25 +++++------
 ci/release/update-version.sh            | 15 +++----
 python/cudf/MANIFEST.in                 | 16 +++++++
 python/cudf/README.md                   |  1 +
 python/cudf/pyproject.toml              | 60 +++++++++++++++++++++++++
 python/cudf/setup.py                    | 56 -----------------------
 python/custreamz/LICENSE                |  1 +
 python/custreamz/pyproject.toml         | 47 ++++++++++++++++++-
 python/custreamz/setup.py               | 30 +------------
 python/dask_cudf/pyproject.toml         | 50 ++++++++++++++++++++-
 python/dask_cudf/setup.cfg              |  5 ---
 python/dask_cudf/setup.py               | 45 +++----------------
 12 files changed, 198 insertions(+), 153 deletions(-)
 create mode 100644 python/cudf/MANIFEST.in
 create mode 120000 python/cudf/README.md
 create mode 120000 python/custreamz/LICENSE
 delete mode 100644 python/dask_cudf/setup.cfg

diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
index e017b24be6e..9d9758f1f15 100755
--- a/ci/release/apply_wheel_modifications.sh
+++ b/ci/release/apply_wheel_modifications.sh
@@ -12,22 +12,19 @@ sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask
 sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
 sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py
 
-# setup.py versions
-sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/cudf/setup.py
-sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/dask_cudf/setup.py
-sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/cudf_kafka/setup.py
-sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/custreamz/setup.py
-
-# cudf setup.py cuda suffixes
-sed -i "s/name=\"cudf\"/name=\"cudf${CUDA_SUFFIX}\"/g" python/cudf/setup.py
-sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/cudf/setup.py
-sed -i "s/ptxcompiler/ptxcompiler${CUDA_SUFFIX}/g" python/cudf/setup.py
-sed -i "s/cubinlinker/cubinlinker${CUDA_SUFFIX}/g" python/cudf/setup.py
+# pyproject.toml versions
+sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf/pyproject.toml
+sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/dask_cudf/pyproject.toml
+sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf_kafka/pyproject.toml
+sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/custreamz/pyproject.toml
 
 # cudf pyproject.toml cuda suffixes
+sed -i "s/^name = \"cudf\"/name = \"cudf${CUDA_SUFFIX}\"/g" python/cudf/pyproject.toml
 sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/cudf/pyproject.toml
+sed -i "s/ptxcompiler/ptxcompiler${CUDA_SUFFIX}/g" python/cudf/pyproject.toml
+sed -i "s/cubinlinker/cubinlinker${CUDA_SUFFIX}/g" python/cudf/pyproject.toml
 
-# dask_cudf setup.py cuda suffixes
-sed -i "s/name=\"dask-cudf\"/name=\"dask-cudf${CUDA_SUFFIX}\"/g" python/dask_cudf/setup.py
+# dask_cudf pyproject.toml cuda suffixes
+sed -i "s/^name = \"dask_cudf\"/name = \"dask_cudf${CUDA_SUFFIX}\"/g" python/dask_cudf/pyproject.toml
 # Need to provide the == to avoid modifying the URL
-sed -i "s/\"cudf==/\"cudf${CUDA_SUFFIX}==/g" python/dask_cudf/setup.py
+sed -i "s/\"cudf==/\"cudf${CUDA_SUFFIX}==/g" python/dask_cudf/pyproject.toml
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 831b91bb2a6..e5c9ba0569f 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -49,11 +49,11 @@ sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/dask
 sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
 sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/custreamz/custreamz/__init__.py
 
-# Python setup.py updates
-sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/cudf/setup.py
-sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/dask_cudf/setup.py
-sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/cudf_kafka/setup.py
-sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/custreamz/setup.py
+# Python pyproject.toml updates
+sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf/pyproject.toml
+sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/pyproject.toml
+sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/pyproject.toml
+sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/custreamz/pyproject.toml
 
 # rapids-cmake version
 sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
@@ -89,12 +89,9 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_
 # Need to distutils-normalize the original version
 NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
 
-# Dependency versions in setup.py
-sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/setup.py
-sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/setup.py
-
 # Dependency versions in pyproject.toml
 sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/pyproject.toml
+sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/pyproject.toml
 
 for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
diff --git a/python/cudf/MANIFEST.in b/python/cudf/MANIFEST.in
new file mode 100644
index 00000000000..4d3155158f8
--- /dev/null
+++ b/python/cudf/MANIFEST.in
@@ -0,0 +1,16 @@
+# Cython files
+recursive-include cudf *.pxd
+recursive-include cudf *.pyx
+
+# Typing files
+recursive-include cudf *.pyi
+
+# C++ files
+recursive-include cudf *.hpp
+recursive-include udf_cpp *.hpp
+recursive-include udf_cpp *.cuh
+
+# Build files. Don't use a recursive include on '.' in case the repo is dirty
+include . CMakeLists.txt
+recursive-include cudf CMakeLists.txt
+recursive-include cmake *
diff --git a/python/cudf/README.md b/python/cudf/README.md
new file mode 120000
index 00000000000..fe840054137
--- /dev/null
+++ b/python/cudf/README.md
@@ -0,0 +1 @@
+../../README.md
\ No newline at end of file
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 305e8822030..0dc719a0846 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -16,6 +16,66 @@ requires = [
     "rmm==23.4.*",
 ]
 
+[project]
+name = "cudf"
+version = "23.04.00"
+description = "cuDF - GPU Dataframe"
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.8"
+dependencies = [
+    "cachetools",
+    "cuda-python>=11.7.1,<12.0",
+    "fsspec>=0.6.0",
+    "numba>=0.56.2",
+    "numpy",
+    "nvtx>=0.2.1",
+    "packaging",
+    "pandas>=1.0,<1.6.0dev0",
+    "protobuf==4.21",
+    "typing_extensions",
+    # Allow floating minor versions for Arrow.
+    "pyarrow==10",
+    "rmm==23.4.*",
+    "ptxcompiler",
+    "cubinlinker",
+    "cupy-cuda11x",
+]
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Database",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.10",
+]
+
+[project.optional-dependencies]
+test = [
+    "pytest",
+    "pytest-benchmark",
+    "pytest-xdist",
+    "hypothesis",
+    "mimesis>=4.1.0",
+    "fastavro>=0.22.9",
+    "python-snappy>=0.6.0",
+    "pyorc",
+    "msgpack",
+    "transformers==4.24.0",
+    "tzdata",
+]
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cudf"
+Documentation = "https://docs.rapids.ai/api/cudf/stable/"
+
+[tool.setuptools]
+license-files = ["LICENSE"]
+
 [tool.isort]
 line_length = 79
 multi_line_output = 3
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 0150c4fe715..8a7ebf574fe 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -3,64 +3,8 @@
 from setuptools import find_packages
 from skbuild import setup
 
-install_requires = [
-    "cachetools",
-    "cuda-python>=11.7.1,<12.0",
-    "fsspec>=0.6.0",
-    "numba>=0.56.2",
-    "numpy",
-    "nvtx>=0.2.1",
-    "packaging",
-    "pandas>=1.0,<1.6.0dev0",
-    "protobuf==4.21",
-    "typing_extensions",
-    # Allow floating minor versions for Arrow.
-    "pyarrow==10",
-    "rmm==23.4.*",
-    "ptxcompiler",
-    "cubinlinker",
-    "cupy-cuda11x",
-]
-
-extras_require = {
-    "test": [
-        "pytest",
-        "pytest-benchmark",
-        "pytest-xdist",
-        "hypothesis",
-        "mimesis>=4.1.0",
-        "fastavro>=0.22.9",
-        "python-snappy>=0.6.0",
-        "pyorc",
-        "msgpack",
-        "transformers==4.24.0",
-        "tzdata",
-    ]
-}
-
 setup(
-    name="cudf",
-    version="23.04.00",
-    description="cuDF - GPU Dataframe",
-    url="https://github.com/rapidsai/cudf",
-    author="NVIDIA Corporation",
-    license="Apache 2.0",
-    classifiers=[
-        "Intended Audience :: Developers",
-        "Topic :: Database",
-        "Topic :: Scientific/Engineering",
-        "License :: OSI Approved :: Apache Software License",
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-    ],
     include_package_data=True,
     packages=find_packages(include=["cudf", "cudf.*"]),
-    package_data={
-        key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"])
-    },
-    install_requires=install_requires,
-    extras_require=extras_require,
     zip_safe=False,
 )
diff --git a/python/custreamz/LICENSE b/python/custreamz/LICENSE
new file mode 120000
index 00000000000..30cff7403da
--- /dev/null
+++ b/python/custreamz/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index d5c41945482..315621fa3c1 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -1,12 +1,57 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 [build-system]
-
+build-backend = "setuptools.build_meta"
 requires = [
     "wheel",
     "setuptools",
 ]
 
+[project]
+name = "custreamz"
+version = "23.04.00"
+description = "cuStreamz - GPU Accelerated Streaming"
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.8"
+dependencies = [
+    "cudf",
+    "cudf_kafka",
+]
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Streaming",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Apache Kafka",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+]
+
+[project.optional-dependencies]
+test = [
+    "pytest",
+    "pytest-xdist",
+]
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cudf"
+
+[tool.setuptools]
+license-files = ["LICENSE"]
+zip-safe = false
+
+[tools.setuptools.packages.find]
+include = [
+    "custreamz",
+    "custreamz.*",
+]
+
 [tool.isort]
 line_length = 79
 multi_line_output = 3
diff --git a/python/custreamz/setup.py b/python/custreamz/setup.py
index 65a7aac6395..2fa45ac8087 100644
--- a/python/custreamz/setup.py
+++ b/python/custreamz/setup.py
@@ -1,31 +1,5 @@
 # Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
-from setuptools import find_packages, setup
+from setuptools import setup
 
-install_requires = ["cudf_kafka", "cudf"]
-
-extras_require = {"test": ["pytest", "pytest-xdist"]}
-
-setup(
-    name="custreamz",
-    version="23.04.00",
-    description="cuStreamz - GPU Accelerated Streaming",
-    url="https://github.com/rapidsai/cudf",
-    author="NVIDIA Corporation",
-    license="Apache 2.0",
-    classifiers=[
-        "Intended Audience :: Developers",
-        "Topic :: Streaming",
-        "Topic :: Scientific/Engineering",
-        "Topic :: Apache Kafka",
-        "License :: OSI Approved :: Apache Software License",
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-    ],
-    packages=find_packages(include=["custreamz", "custreamz.*"]),
-    install_requires=install_requires,
-    extras_require=extras_require,
-    zip_safe=False,
-)
+setup()
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 8cf823d4291..07b0edb6008 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -1,12 +1,58 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 
 [build-system]
-
+build-backend = "setuptools.build_meta"
 requires = [
     "wheel",
     "setuptools",
 ]
 
+[project]
+name = "dask_cudf"
+version = "23.04.00"
+description = "Utilities for Dask and cuDF interactions"
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.8"
+dependencies = [
+    "dask>=2023.1.1",
+    "distributed>=2023.1.1",
+    "fsspec>=0.6.0",
+    "numpy",
+    "pandas>=1.0,<1.6.0dev0",
+    "cudf==23.4.*",
+    "cupy-cuda11x",
+]
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Database",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+]
+dynamic = ["entry-points"]
+
+[project.optional-dependencies]
+test = [
+    "numpy",
+    "pandas>=1.0,<1.6.0dev0",
+    "pytest",
+    "pytest-xdist",
+    "numba>=0.56.2",
+]
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cudf"
+
+[tool.setuptools]
+license-files = ["LICENSE"]
+
 [tool.isort]
 line_length = 79
 multi_line_output = 3
diff --git a/python/dask_cudf/setup.cfg b/python/dask_cudf/setup.cfg
deleted file mode 100644
index 8139b3c7dc6..00000000000
--- a/python/dask_cudf/setup.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-
-[options.entry_points]
-dask.dataframe.backends =
-    cudf = dask_cudf.backends:CudfBackendEntrypoint
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 8611f2379f7..3fa0f257834 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -2,44 +2,13 @@
 
 from setuptools import find_packages, setup
 
-install_requires = [
-    "dask>=2023.1.1",
-    "distributed>=2023.1.1",
-    "fsspec>=0.6.0",
-    "numpy",
-    "pandas>=1.0,<1.6.0dev0",
-    "cudf==23.4.*",
-    "cupy-cuda11x",
-]
-
-extras_require = {
-    "test": [
-        "numpy",
-        "pandas>=1.0,<1.6.0dev0",
-        "pytest",
-        "pytest-xdist",
-        "numba>=0.56.2",
-    ]
-}
-
 setup(
-    name="dask-cudf",
-    version="23.04.00",
-    description="Utilities for Dask and cuDF interactions",
-    url="https://github.com/rapidsai/cudf",
-    author="NVIDIA Corporation",
-    license="Apache 2.0",
-    classifiers=[
-        "Intended Audience :: Developers",
-        "Topic :: Database",
-        "Topic :: Scientific/Engineering",
-        "License :: OSI Approved :: Apache Software License",
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-    ],
+    include_package_data=True,
     packages=find_packages(exclude=["tests", "tests.*"]),
-    install_requires=install_requires,
-    extras_require=extras_require,
+    entry_points={
+        "dask.dataframe.backends": [
+            "cudf = dask_cudf.backends:CudfBackendEntrypoint",
+        ]
+    },
+    zip_safe=False,
 )

From 618194d944199bff964b2dbca4947c6708f42d8c Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 6 Mar 2023 14:52:11 -0600
Subject: [PATCH 35/60] Deprecate `names` & `dtype` in `Index.copy` (#12825)

This PR deprecates `dtype` in `Index & MultiIndex`.`copy`, and `names` in `Index.copy`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12825
---
 python/cudf/cudf/core/index.py      | 38 ++++++++++++++++++++++++++++-
 python/cudf/cudf/core/multiindex.py | 25 +++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 324db416be4..cd882aba297 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -313,9 +313,20 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
             Ignored for RangeIndex
         dtype : numpy dtype optional (default: None)
             Target dtype for underlying range data
+
+            .. deprecated:: 23.02
+
+               The `dtype` parameter is deprecated and will be removed in
+               a future version of cudf. Use the `astype` method instead.
+
         names : list-like optional (default: False)
             Kept compatibility with MultiIndex. Should not be used.
 
+            .. deprecated:: 23.04
+
+               The parameter `names` is deprecated and will be removed in
+               a future version of cudf. Use the `name` parameter instead.
+
         Returns
         -------
         New RangeIndex instance with same range, casted to new dtype
@@ -327,6 +338,13 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
                 FutureWarning,
             )
 
+        if names is not None:
+            warnings.warn(
+                "parameter names is deprecated and will be removed in a "
+                "future version. Use the name parameter instead.",
+                FutureWarning,
+            )
+
         dtype = self.dtype if dtype is None else dtype
 
         if not np.issubdtype(dtype, np.signedinteger):
@@ -1135,9 +1153,20 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
             With ``deep=False`` the original data is used
         dtype : numpy dtype, default None
             Target datatype to cast into, use original dtype when None
+
+            .. deprecated:: 23.02
+
+               The `dtype` parameter is deprecated and will be removed in
+               a future version of cudf. Use the `astype` method instead.
+
         names : list-like, default False
             Kept compatibility with MultiIndex. Should not be used.
 
+            .. deprecated:: 23.04
+
+               The parameter `names` is deprecated and will be removed in
+               a future version of cudf. Use the `name` parameter instead.
+
         Returns
         -------
         New index instance, casted to new dtype
@@ -1149,6 +1178,13 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
                 FutureWarning,
             )
 
+        if names is not None:
+            warnings.warn(
+                "parameter names is deprecated and will be removed in a "
+                "future version. Use the name parameter instead.",
+                FutureWarning,
+            )
+
         dtype = self.dtype if dtype is None else dtype
         name = self.name if name is None else name
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 783c3996400..573a3f7f1d7 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -336,11 +336,29 @@ def copy(
             Names for each of the index levels.
         dtype : object, optional (default None)
             MultiIndex dtype, only supports None or object type
+
+            .. deprecated:: 23.02
+
+               The `dtype` parameter is deprecated and will be removed in
+               a future version of cudf. Use the `astype` method instead.
+
         levels : sequence of arrays, optional (default None)
             The unique labels for each level. Original values used if None.
+
+            .. deprecated:: 23.02
+
+               The `levels` parameter is deprecated and will be removed in
+               a future version of cudf.
+
         codes : sequence of arrays, optional (default None)
             Integers for each level designating which label at each location.
             Original values used if None.
+
+            .. deprecated:: 23.02
+
+               The `codes` parameter is deprecated and will be removed in
+               a future version of cudf.
+
         deep : Bool (default False)
             If True, `._data`, `._levels`, `._codes` will be copied. Ignored if
             `levels` or `codes` are specified.
@@ -401,6 +419,13 @@ def copy(
                 FutureWarning,
             )
 
+        if dtype is not None:
+            warnings.warn(
+                "parameter dtype is deprecated and will be removed in a "
+                "future version. Use the astype method instead.",
+                FutureWarning,
+            )
+
         dtype = object if dtype is None else dtype
         if not pd.core.dtypes.common.is_object_dtype(dtype):
             raise TypeError("Dtype for MultiIndex only supports object type.")

From a7ede21ea12586a02ca60aed705b26430134bab3 Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Mon, 6 Mar 2023 13:34:46 -0800
Subject: [PATCH 36/60] min_rows and num_rows are swapped in ComputePageSizes
 declaration in Parquet reader (#12886)

Arguments are swapped in declaration (and thus docs as well). Args are passed as required by the definition when `ComputePageSizes` is called.

Authors:
  - Ed Seidl (https://github.com/etseidl)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/12886
---
 cpp/src/io/parquet/parquet_gpu.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index c91f182c4f4..c3d3843362a 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -445,8 +445,8 @@ void BuildStringDictionaryIndex(ColumnChunkDesc* chunks,
  *
  * @param pages All pages to be decoded
  * @param chunks All chunks to be decoded
- * @param num_rows Maximum number of rows to read
  * @param min_rows crop all rows below min_row
+ * @param num_rows Maximum number of rows to read
  * @param compute_num_rows If set to true, the num_rows field in PageInfo will be
  * computed
  * @param compute_string_sizes If set to true, the str_bytes field in PageInfo will
@@ -455,8 +455,8 @@ void BuildStringDictionaryIndex(ColumnChunkDesc* chunks,
  */
 void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
                       hostdevice_vector<ColumnChunkDesc> const& chunks,
-                      size_t num_rows,
                       size_t min_row,
+                      size_t num_rows,
                       bool compute_num_rows,
                       bool compute_string_sizes,
                       rmm::cuda_stream_view stream);

From f00baad6fb8fb436877b4904251922e05a54f87d Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 6 Mar 2023 14:57:47 -0800
Subject: [PATCH 37/60] Update to GCC 11 (#12868)

This PR updates builds to use GCC 11.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Jordan Jacobelli (https://github.com/jjacobelli)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/12868
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/recipes/cudf/conda_build_config.yaml       | 4 ++--
 conda/recipes/cudf_kafka/conda_build_config.yaml | 4 ++--
 conda/recipes/libcudf/conda_build_config.yaml    | 4 ++--
 cpp/benchmarks/sort/nested_types_common.hpp      | 3 +++
 dependencies.yaml                                | 4 ++--
 6 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 00aa1e2bc0f..72a8795fd13 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -28,7 +28,7 @@ dependencies:
 - doxygen=1.8.20
 - fastavro>=0.22.9
 - fsspec>=0.6.0
-- gcc_linux-64=9.*
+- gcc_linux-64=11.*
 - hypothesis
 - ipython
 - libarrow=10
diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml
index 4feac647e8c..7494fec79a0 100644
--- a/conda/recipes/cudf/conda_build_config.yaml
+++ b/conda/recipes/cudf/conda_build_config.yaml
@@ -1,8 +1,8 @@
 c_compiler_version:
-  - 9
+  - 11
 
 cxx_compiler_version:
-  - 9
+  - 11
 
 sysroot_version:
   - "2.17"
diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml
index c049d21fd91..ccc49851a8e 100644
--- a/conda/recipes/cudf_kafka/conda_build_config.yaml
+++ b/conda/recipes/cudf_kafka/conda_build_config.yaml
@@ -1,8 +1,8 @@
 c_compiler_version:
-  - 9
+  - 11
 
 cxx_compiler_version:
-  - 9
+  - 11
 
 sysroot_version:
   - "2.17"
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index ff8d9026aef..1111fc0a24e 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -1,8 +1,8 @@
 c_compiler_version:
-  - 9
+  - 11
 
 cxx_compiler_version:
-  - 9
+  - 11
 
 cuda_compiler:
   - nvcc
diff --git a/cpp/benchmarks/sort/nested_types_common.hpp b/cpp/benchmarks/sort/nested_types_common.hpp
index c4851823534..fabef3a7a51 100644
--- a/cpp/benchmarks/sort/nested_types_common.hpp
+++ b/cpp/benchmarks/sort/nested_types_common.hpp
@@ -21,7 +21,10 @@
 
 #include <cudf_test/column_wrapper.hpp>
 
+// This error appears in GCC 11.3 and may be a compiler bug or nvbench bug.
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #include <nvbench/nvbench.cuh>
+#pragma GCC diagnostic pop
 
 #include <random>
 
diff --git a/dependencies.yaml b/dependencies.yaml
index 2c359d58378..accf3f00b3d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -77,12 +77,12 @@ dependencies:
           - matrix:
               arch: x86_64
             packages:
-              - &gcc_amd64 gcc_linux-64=9.*
+              - &gcc_amd64 gcc_linux-64=11.*
               - &sysroot_amd64 sysroot_linux-64==2.17
           - matrix:
               arch: aarch64
             packages:
-              - &gcc_aarch64 gcc_linux-aarch64=9.*
+              - &gcc_aarch64 gcc_linux-aarch64=11.*
               - &sysroot_aarch64 sysroot_linux-aarch64==2.17
       - output_types: conda
         matrices:

From 98b92a560b783e40b8865fde399d5183373a8b50 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 6 Mar 2023 15:14:43 -0800
Subject: [PATCH 38/60] Update to protobuf>=4.21.6,<4.22. (#12864)

This updates cudf's protobuf version to resolve #12830 and a downstream conflict with `ortools` (a dependency of cuopt), which requires `protobuf>=4.21.5`.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Jordan Jacobelli (https://github.com/jjacobelli)

URL: https://github.com/rapidsai/cudf/pull/12864
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/recipes/cudf/meta.yaml                     | 4 ++--
 dependencies.yaml                                | 2 +-
 python/cudf/pyproject.toml                       | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 72a8795fd13..67e2dc4720e 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -50,7 +50,7 @@ dependencies:
 - pandoc<=2.0.0
 - pip
 - pre-commit
-- protobuf=4.21
+- protobuf>=4.21.6,<4.22
 - ptxcompiler
 - pyarrow=10
 - pydata-sphinx-theme
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 27073eb323b..1ce401abb81 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -45,7 +45,7 @@ requirements:
     - ninja
     - sysroot_{{ target_platform }} {{ sysroot_version }}
   host:
-    - protobuf =4.21
+    - protobuf >=4.21.6,<4.22
     - python
     - cython >=0.29,<0.30
     - scikit-build >=0.13.1
@@ -57,7 +57,7 @@ requirements:
     - rmm ={{ minor_version }}
     - cudatoolkit ={{ cuda_version }}
   run:
-    - protobuf =4.21
+    - protobuf >=4.21.6,<4.22
     - python
     - typing_extensions
     - pandas >=1.0,<1.6.0dev0
diff --git a/dependencies.yaml b/dependencies.yaml
index accf3f00b3d..4bac8148b10 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -115,7 +115,7 @@ dependencies:
           - scikit-build>=0.13.1
       - output_types: conda
         packages:
-          - protobuf=4.21
+          - protobuf>=4.21.6,<4.22
   cudatoolkit:
     specific:
       - output_types: conda
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 0dc719a0846..ca14ccfc63e 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -35,7 +35,7 @@ dependencies = [
     "nvtx>=0.2.1",
     "packaging",
     "pandas>=1.0,<1.6.0dev0",
-    "protobuf==4.21",
+    "protobuf>=4.21.6,<4.22",
     "typing_extensions",
     # Allow floating minor versions for Arrow.
     "pyarrow==10",

From 0ee82c9619ef2145ea270c6c97eb5c5060eb2884 Mon Sep 17 00:00:00 2001
From: Peixin <pxli@nyu.edu>
Date: Tue, 7 Mar 2023 13:09:44 +0800
Subject: [PATCH 39/60] Update JNI build ENV default to gcc 11 (#12881)

related to #2568

update JNI dockerfile default to gcc 11.

mark as draft for review first, will verify after https://github.com/rapidsai/cudf/pull/12868

Authors:
  - Peixin (https://github.com/pxLi)

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/12881
---
 java/ci/Dockerfile.centos7 | 4 ++--
 java/ci/README.md          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/ci/Dockerfile.centos7 b/java/ci/Dockerfile.centos7
index dd6b161ac24..65f151a5126 100644
--- a/java/ci/Dockerfile.centos7
+++ b/java/ci/Dockerfile.centos7
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ ARG CUDA_VERSION=11.8.0
 FROM nvidia/cuda:$CUDA_VERSION-devel-centos7
 
 ### Install basic requirements
-ARG DEVTOOLSET_VERSION=9
+ARG DEVTOOLSET_VERSION=11
 RUN yum install -y centos-release-scl
 RUN yum install -y devtoolset-${DEVTOOLSET_VERSION} epel-release
 RUN yum install -y git zlib-devel maven tar wget patch ninja-build
diff --git a/java/ci/README.md b/java/ci/README.md
index ee2c7c24555..daf0bedd6c5 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -42,7 +42,7 @@ git clone --recursive https://github.com/rapidsai/cudf.git -b branch-23.04
 ```bash
 cd cudf
 export WORKSPACE=`pwd`
-scl enable devtoolset-9 "java/ci/build-in-docker.sh"
+scl enable devtoolset-11 "java/ci/build-in-docker.sh"
 ```
 
 ### The output

From a78fdd51ca994d2072085f5c70b33fc3452a3484 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 7 Mar 2023 07:54:41 -0500
Subject: [PATCH 40/60] Fix cudf::hash_partition kernel launch error with
 decimal128 types (#12863)

Fixes `cudf::hash_partition` error when using `decimal128` column types. The internal optimized path, `copy_block_partitions`, uses shared-memory for copying fixed-width type column elements. For `int128_t` type, the shared-memory needed (~64KB) is larger than the maximum size (~48KB) allowed causing a kernel launch failure.

The optimized path is now restricted to only fixed-width types `int64_t` and below. The `int128_t` column types will fall through to the gather-map pattern instead. Accommodating this type in the existing copy-block implementation would likely penalize the performance of the other fixed-width types. If the new implementation becomes insufficient, we could explore a special optimized path in the future for the single type `int128_t`.

An existing gtest for fixed-point types was updated to include a `decimal128` column to catch this kind of error in the future.

Closes #12852

Authors:
  - David Wendt (https://github.com/davidwendt)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Divye Gala (https://github.com/divyegala)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12863
---
 cpp/src/partitioning/partitioning.cu          | 24 +++++++++++++------
 .../partitioning/hash_partition_test.cpp      | 14 +++++------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index edf5d6d6612..54dffc85aca 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <cub/cub.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
 #include <cudf/detail/gather.hpp>
@@ -36,6 +35,9 @@
 #include <thrust/scan.h>
 #include <thrust/transform.h>
 
+#include <cub/block/block_scan.cuh>
+#include <cub/device/device_histogram.cuh>
+
 namespace cudf {
 namespace {
 // Launch configuration for optimized hash partition
@@ -389,7 +391,15 @@ rmm::device_uvector<size_type> compute_gather_map(size_type num_rows,
 }
 
 struct copy_block_partitions_dispatcher {
-  template <typename DataType, std::enable_if_t<is_fixed_width<DataType>()>* = nullptr>
+  template <typename DataType>
+  constexpr static bool is_copy_block_supported()
+  {
+    // The shared-memory used for fixed-width types in the copy_block_partitions_impl function
+    // will be too large for any DataType greater than int64_t.
+    return is_fixed_width<DataType>() && (sizeof(DataType) <= sizeof(int64_t));
+  }
+
+  template <typename DataType, CUDF_ENABLE_IF(is_copy_block_supported<DataType>())>
   std::unique_ptr<column> operator()(column_view const& input,
                                      const size_type num_partitions,
                                      size_type const* row_partition_numbers,
@@ -416,7 +426,7 @@ struct copy_block_partitions_dispatcher {
     return std::make_unique<column>(input.type(), input.size(), std::move(output));
   }
 
-  template <typename DataType, std::enable_if_t<not is_fixed_width<DataType>()>* = nullptr>
+  template <typename DataType, CUDF_ENABLE_IF(not is_copy_block_supported<DataType>())>
   std::unique_ptr<column> operator()(column_view const& input,
                                      const size_type num_partitions,
                                      size_type const* row_partition_numbers,
@@ -713,7 +723,7 @@ struct dispatch_map_type {
 }  // namespace
 
 namespace detail {
-namespace local {
+namespace {
 template <template <typename> class hash_function>
 std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
   table_view const& input,
@@ -738,7 +748,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
       input, table_to_hash, num_partitions, seed, stream, mr);
   }
 }
-}  // namespace local
+}  // namespace
 
 std::pair<std::unique_ptr<table>, std::vector<size_type>> partition(
   table_view const& t,
@@ -779,10 +789,10 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
         if (!is_numeric(input.column(column_id).type()))
           CUDF_FAIL("IdentityHash does not support this data type");
       }
-      return detail::local::hash_partition<detail::IdentityHash>(
+      return detail::hash_partition<detail::IdentityHash>(
         input, columns_to_hash, num_partitions, seed, stream, mr);
     case (hash_id::HASH_MURMUR3):
-      return detail::local::hash_partition<detail::MurmurHash3_32>(
+      return detail::hash_partition<detail::MurmurHash3_32>(
         input, columns_to_hash, num_partitions, seed, stream, mr);
     default: CUDF_FAIL("Unsupported hash function in hash_partition");
   }
diff --git a/cpp/tests/partitioning/hash_partition_test.cpp b/cpp/tests/partitioning/hash_partition_test.cpp
index 7731ad815d2..9d206c5397d 100644
--- a/cpp/tests/partitioning/hash_partition_test.cpp
+++ b/cpp/tests/partitioning/hash_partition_test.cpp
@@ -349,21 +349,21 @@ TEST_F(HashPartition, FixedPointColumnsToHash)
 {
   fixed_width_column_wrapper<int32_t> to_hash({1});
   cudf::test::fixed_point_column_wrapper<int64_t> first_col({7}, numeric::scale_type{-1});
+  cudf::test::fixed_point_column_wrapper<__int128_t> second_col({77}, numeric::scale_type{0});
 
-  auto first_input = cudf::table_view({to_hash, first_col});
+  auto input = cudf::table_view({to_hash, first_col, second_col});
 
   auto columns_to_hash = std::vector<cudf::size_type>({0});
 
   cudf::size_type const num_partitions = 1;
-  auto [first_result, first_offsets] =
-    cudf::hash_partition(first_input, columns_to_hash, num_partitions);
+  auto [result, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions);
 
   // Expect offsets to be equal and num_partitions in length
-  EXPECT_EQ(static_cast<size_t>(num_partitions), first_offsets.size());
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(first_result->get_column(0).view(), first_input.column(0));
+  EXPECT_EQ(static_cast<size_t>(num_partitions), offsets.size());
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(first_result->get_column(1).view(), first_input.column(1));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->get_column(0).view(), input.column(0));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->get_column(1).view(), input.column(1));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->get_column(2).view(), input.column(2));
 }
 
 TEST_F(HashPartition, ListWithNulls)

From 6d1f8e30dbe73031be09999293bd709a64f679f9 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 7 Mar 2023 07:20:55 -0600
Subject: [PATCH 41/60] Deprecate `na_sentinel` in `factorize` (#12817)

This PR:

- [x] Deprecates `na_sentinel` in `factorize`.
- [x] Introduces `use_na_sentinel` as an alternative.
- [x] Introduces `sort` in `factorize`.
- [x] Fixing up docs.

The above changes are required as part of enabling pandas 2.0 support.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/12817
---
 .../source/api_docs/general_functions.rst     |   4 +-
 docs/cudf/source/api_docs/series.rst          |   1 -
 python/cudf/cudf/core/algorithms.py           | 103 +++++++++++++++---
 python/cudf/cudf/core/column/column.py        |  12 +-
 python/cudf/cudf/core/multiindex.py           |   8 +-
 python/cudf/cudf/core/single_column_frame.py  |  26 ++++-
 python/cudf/cudf/tests/test_series.py         |  47 +++++++-
 7 files changed, 177 insertions(+), 24 deletions(-)

diff --git a/docs/cudf/source/api_docs/general_functions.rst b/docs/cudf/source/api_docs/general_functions.rst
index 40e1b766dc9..112df2fdf9f 100644
--- a/docs/cudf/source/api_docs/general_functions.rst
+++ b/docs/cudf/source/api_docs/general_functions.rst
@@ -10,12 +10,14 @@ Data manipulations
    :toctree: api/
 
    cudf.concat
+   cudf.crosstab
    cudf.cut
+   cudf.factorize
    cudf.get_dummies
    cudf.melt
+   cudf.merge
    cudf.pivot
    cudf.pivot_table
-   cudf.crosstab
    cudf.unstack
 
 Top-level conversions
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 386da4055d8..9cd0770431c 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -102,7 +102,6 @@ Function application, GroupBy & window
    :toctree: api/
 
    Series.apply
-   Series.applymap
    Series.map
    Series.groupby
    Series.rolling
diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 73fc1130073..7012496434a 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 import warnings
 
 import cupy as cp
@@ -7,19 +7,35 @@
 from cudf.core.column import as_column
 from cudf.core.index import Index, RangeIndex
 from cudf.core.indexed_frame import IndexedFrame
+from cudf.core.scalar import Scalar
 from cudf.core.series import Series
 
 
-def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
+def factorize(
+    values, sort=False, na_sentinel=None, use_na_sentinel=None, size_hint=None
+):
     """Encode the input values as integer labels
 
     Parameters
     ----------
     values: Series, Index, or CuPy array
         The data to be factorized.
+    sort : bool, default True
+        Sort uniques and shuffle codes to maintain the relationship.
     na_sentinel : number, default -1
         Value to indicate missing category.
 
+        .. deprecated:: 23.04
+
+           The na_sentinel argument is deprecated and will be removed in
+           a future version of cudf. Specify use_na_sentinel as
+           either True or False.
+    use_na_sentinel : bool, default True
+        If True, the sentinel -1 will be used for NA values.
+        If False, NA values will be encoded as non-negative
+        integers and will not drop the NA from the uniques
+        of the values.
+
     Returns
     -------
     (labels, cats) : (cupy.ndarray, cupy.ndarray or Index)
@@ -27,9 +43,14 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
         - *cats* contains the categories in order that the N-th
             item corresponds to the (N-1) code.
 
+    See Also
+    --------
+    cudf.Series.factorize : Encode the input values of Series.
+
     Examples
     --------
     >>> import cudf
+    >>> import numpy as np
     >>> data = cudf.Series(['a', 'c', 'c'])
     >>> codes, uniques = cudf.factorize(data)
     >>> codes
@@ -37,29 +58,85 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
     >>> uniques
     StringIndex(['a' 'c'], dtype='object')
 
-    See Also
-    --------
-    cudf.Series.factorize : Encode the input values of Series.
+    When ``use_na_sentinel=True`` (the default), missing values are indicated
+    in the `codes` with the sentinel value ``-1`` and missing values are not
+    included in `uniques`.
 
+    >>> codes, uniques = cudf.factorize(['b', None, 'a', 'c', 'b'])
+    >>> codes
+    array([ 1, -1,  0,  2,  1], dtype=int8)
+    >>> uniques
+    StringIndex(['a' 'b' 'c'], dtype='object')
+
+    If NA is in the values, and we want to include NA in the uniques of the
+    values, it can be achieved by setting ``use_na_sentinel=False``.
+
+    >>> values = np.array([1, 2, 1, np.nan])
+    >>> codes, uniques = cudf.factorize(values)
+    >>> codes
+    array([ 0,  1,  0, -1], dtype=int8)
+    >>> uniques
+    Float64Index([1.0, 2.0], dtype='float64')
+    >>> codes, uniques = cudf.factorize(values, use_na_sentinel=False)
+    >>> codes
+    array([1, 2, 1, 0], dtype=int8)
+    >>> uniques
+    Float64Index([<NA>, 1.0, 2.0], dtype='float64')
     """
-    if sort:
-        raise NotImplementedError(
-            "Sorting not yet supported during factorization."
+    # TODO: Drop `na_sentinel` in the next release immediately after
+    # pandas 2.0 upgrade.
+    if na_sentinel is not None and use_na_sentinel is not None:
+        raise ValueError(
+            "Cannot specify both `na_sentinel` and `use_na_sentile`; "
+            f"got `na_sentinel={na_sentinel}` and "
+            f"`use_na_sentinel={use_na_sentinel}`"
         )
+
+    return_cupy_array = isinstance(values, cp.ndarray)
+
+    values = Series(values)
+
     if na_sentinel is None:
-        raise NotImplementedError("na_sentinel can not be None.")
+        na_sentinel = (
+            -1
+            if use_na_sentinel is None or use_na_sentinel
+            else Scalar(None, dtype=values.dtype)
+        )
+    else:
+        if na_sentinel is None:
+            msg = (
+                "Specifying `na_sentinel=None` is deprecated, specify "
+                "`use_na_sentinel=False` instead."
+            )
+        elif na_sentinel == -1:
+            msg = (
+                "Specifying `na_sentinel=-1` is deprecated, specify "
+                "`use_na_sentinel=True` instead."
+            )
+        else:
+            msg = (
+                "Specifying the specific value to use for `na_sentinel` is "
+                "deprecated and will be removed in a future version of cudf. "
+                "Specify `use_na_sentinel=True` to use the sentinel value -1, "
+                "and `use_na_sentinel=False` to encode NA values.",
+            )
+        warnings.warn(msg, FutureWarning)
 
     if size_hint:
         warnings.warn("size_hint is not applicable for cudf.factorize")
 
-    return_cupy_array = isinstance(values, cp.ndarray)
+    if use_na_sentinel is None or use_na_sentinel:
+        cats = values._column.dropna()
+    else:
+        cats = values._column
 
-    values = Series(values)
+    cats = cats.unique().astype(values.dtype)
 
-    cats = values._column.dropna().unique().astype(values.dtype)
+    if sort:
+        cats, _ = cats.sort_by_values()
 
     labels = values._column._label_encoding(
-        cats=cats, na_sentinel=na_sentinel
+        cats=cats, na_sentinel=Scalar(na_sentinel)
     ).values
 
     return labels, cats.values if return_cupy_array else Index(cats)
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index b5f36aa3594..40921b71db5 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1014,7 +1014,7 @@ def as_categorical_column(self, dtype, **kwargs) -> ColumnBase:
         cats = self.unique().astype(self.dtype)
         label_dtype = min_unsigned_type(len(cats))
         labels = self._label_encoding(
-            cats=cats, dtype=label_dtype, na_sentinel=1
+            cats=cats, dtype=label_dtype, na_sentinel=cudf.Scalar(1)
         )
 
         # columns include null index in factorization; remove:
@@ -1304,7 +1304,10 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
         return self
 
     def _label_encoding(
-        self, cats: ColumnBase, dtype: Dtype = None, na_sentinel=-1
+        self,
+        cats: ColumnBase,
+        dtype: Dtype = None,
+        na_sentinel: ScalarLike = None,
     ):
         """
         Convert each value in `self` into an integer code, with `cats`
@@ -1337,6 +1340,9 @@ def _label_encoding(
         """
         from cudf._lib.join import join as cpp_join
 
+        if na_sentinel is None or na_sentinel.value is cudf.NA:
+            na_sentinel = cudf.Scalar(-1)
+
         def _return_sentinel_column():
             return cudf.core.column.full(
                 size=len(self), fill_value=na_sentinel, dtype=dtype
@@ -1363,7 +1369,7 @@ def _return_sentinel_column():
         )
         codes = codes.take(
             right_gather_map, nullify=True, check_bounds=False
-        ).fillna(na_sentinel)
+        ).fillna(na_sentinel.value)
 
         # reorder `codes` so that its values correspond to the
         # values of `self`:
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 573a3f7f1d7..1f26371f797 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -739,7 +739,13 @@ def _compute_levels_and_codes(self):
 
         codes = {}
         for name, col in self._data.items():
-            code, cats = cudf.Series._from_data({None: col}).factorize()
+            with warnings.catch_warnings():
+                # TODO: Remove this filter when
+                # `na_sentinel` is removed from `factorize`.
+                # This is a filter to not let the warnings from
+                # `factorize` show up in other parts of public APIs.
+                warnings.simplefilter("ignore")
+                code, cats = cudf.Series._from_data({None: col}).factorize()
             codes[name] = code.astype(np.int64)
             levels.append(cudf.Series(cats, name=None))
 
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index afd06ea3629..c4128621148 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 """Base class for Frame types that only have a single column."""
 
 from __future__ import annotations
@@ -270,14 +270,27 @@ def __cuda_array_interface__(self):
         return self._column.__cuda_array_interface__
 
     @_cudf_nvtx_annotate
-    def factorize(self, na_sentinel=-1):
+    def factorize(self, sort=False, na_sentinel=None, use_na_sentinel=None):
         """Encode the input values as integer labels.
 
         Parameters
         ----------
-        na_sentinel : number
+        sort : bool, default True
+            Sort uniques and shuffle codes to maintain the relationship.
+        na_sentinel : number, default -1
             Value to indicate missing category.
 
+            .. deprecated:: 23.04
+
+               The na_sentinel argument is deprecated and will be removed in
+               a future version of cudf. Specify use_na_sentinel as
+               either True or False.
+        use_na_sentinel : bool, default True
+            If True, the sentinel -1 will be used for NA values.
+            If False, NA values will be encoded as non-negative
+            integers and will not drop the NA from the uniques
+            of the values.
+
         Returns
         -------
         (labels, cats) : (cupy.ndarray, cupy.ndarray or Index)
@@ -295,7 +308,12 @@ def factorize(self, na_sentinel=-1):
         >>> uniques
         StringIndex(['a' 'c'], dtype='object')
         """
-        return cudf.core.algorithms.factorize(self, na_sentinel=na_sentinel)
+        return cudf.core.algorithms.factorize(
+            self,
+            sort=sort,
+            na_sentinel=na_sentinel,
+            use_na_sentinel=use_na_sentinel,
+        )
 
     @_cudf_nvtx_annotate
     def _make_operands_for_binop(
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index eb05468d923..ce519a445ba 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -490,12 +490,57 @@ def test_series_factorize(data, na_sentinel):
 
     with pytest.warns(FutureWarning):
         expected_labels, expected_cats = psr.factorize(na_sentinel=na_sentinel)
-    actual_labels, actual_cats = gsr.factorize(na_sentinel=na_sentinel)
+    with pytest.warns(FutureWarning):
+        actual_labels, actual_cats = gsr.factorize(na_sentinel=na_sentinel)
 
     assert_eq(expected_labels, actual_labels.get())
     assert_eq(expected_cats.values, actual_cats.to_pandas().values)
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 2, 1],
+        [1, 2, None, 3, 1, 1],
+        [],
+        ["a", "b", "c", None, "z", "a"],
+    ],
+)
+@pytest.mark.parametrize("use_na_sentinel", [True, False])
+def test_series_factorize_use_na_sentinel(data, use_na_sentinel):
+    gsr = cudf.Series(data)
+    psr = gsr.to_pandas(nullable=True)
+
+    expected_labels, expected_cats = psr.factorize(
+        use_na_sentinel=use_na_sentinel, sort=True
+    )
+    actual_labels, actual_cats = gsr.factorize(
+        use_na_sentinel=use_na_sentinel, sort=True
+    )
+    assert_eq(expected_labels, actual_labels.get())
+    assert_eq(expected_cats, actual_cats.to_pandas(nullable=True))
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 2, 1],
+        [1, 2, None, 3, 1, 1],
+        [],
+        ["a", "b", "c", None, "z", "a"],
+    ],
+)
+@pytest.mark.parametrize("sort", [True, False])
+def test_series_factorize_sort(data, sort):
+    gsr = cudf.Series(data)
+    psr = gsr.to_pandas(nullable=True)
+
+    expected_labels, expected_cats = psr.factorize(sort=sort)
+    actual_labels, actual_cats = gsr.factorize(sort=sort)
+    assert_eq(expected_labels, actual_labels.get())
+    assert_eq(expected_cats, actual_cats.to_pandas(nullable=True))
+
+
 @pytest.mark.parametrize(
     "data",
     [

From 97d8d1288a7d495469c8743d2714c0d1ecaf0571 Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Tue, 7 Mar 2023 16:44:05 -0500
Subject: [PATCH 42/60] Update `distinct/unique_count` to `experimental::row`
 hasher/comparator (#12776)

This PR is a part of #11844

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12776
---
 cpp/src/stream_compaction/distinct_count.cu   | 48 +++++++++++------
 .../stream_compaction_common.cuh              | 20 +------
 .../stream_compaction_common.hpp              |  2 -
 cpp/src/stream_compaction/unique_count.cu     | 29 ++++++----
 .../distinct_count_tests.cpp                  | 53 ++++++++++++++++++-
 .../stream_compaction/unique_count_tests.cpp  | 53 ++++++++++++++++++-
 6 files changed, 157 insertions(+), 48 deletions(-)

diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu
index 8cde6e0a7ed..760fcf4bb6b 100644
--- a/cpp/src/stream_compaction/distinct_count.cu
+++ b/cpp/src/stream_compaction/distinct_count.cu
@@ -26,6 +26,7 @@
 #include <cudf/detail/sorting.hpp>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/stream_compaction.hpp>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
@@ -125,9 +126,8 @@ cudf::size_type distinct_count(table_view const& keys,
                                null_equality nulls_equal,
                                rmm::cuda_stream_view stream)
 {
-  auto table_ptr      = cudf::table_device_view::create(keys, stream);
-  auto const num_rows = table_ptr->num_rows();
-  auto const has_null = nullate::DYNAMIC{cudf::has_nulls(keys)};
+  auto const num_rows  = keys.num_rows();
+  auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(keys)};
 
   hash_map_type key_map{compute_hash_table_size(num_rows),
                         cuco::empty_key{COMPACTION_EMPTY_KEY_SENTINEL},
@@ -135,23 +135,39 @@ cudf::size_type distinct_count(table_view const& keys,
                         detail::hash_table_allocator_type{default_allocator<char>{}, stream},
                         stream.value()};
 
-  compaction_hash hash_key{has_null, *table_ptr};
-  row_equality_comparator row_equal(has_null, *table_ptr, *table_ptr, nulls_equal);
+  auto const preprocessed_input =
+    cudf::experimental::row::hash::preprocessed_table::create(keys, stream);
+
+  auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
+  auto const hash_key   = experimental::compaction_hash(row_hasher.device_hasher(has_nulls));
+
+  auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input);
+
   auto iter = cudf::detail::make_counting_transform_iterator(
     0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
 
-  // when nulls are equal, insert non-null rows only to improve efficiency
-  if (nulls_equal == null_equality::EQUAL and has_null) {
-    thrust::counting_iterator<size_type> stencil(0);
-    auto const [row_bitmask, null_count] = cudf::detail::bitmask_or(keys, stream);
-    row_validity pred{static_cast<bitmask_type const*>(row_bitmask.data())};
-
-    key_map.insert_if(iter, iter + num_rows, stencil, pred, hash_key, row_equal, stream.value());
-    return key_map.get_size() + static_cast<std::size_t>((null_count > 0) ? 1 : 0);
+  auto const comparator_helper = [&](auto const row_equal) {
+    // when nulls are equal, insert non-null rows only to improve efficiency
+    if (nulls_equal == null_equality::EQUAL and has_nulls) {
+      thrust::counting_iterator<size_type> stencil(0);
+      auto const [row_bitmask, null_count] = cudf::detail::bitmask_or(keys, stream);
+      row_validity pred{static_cast<bitmask_type const*>(row_bitmask.data())};
+
+      key_map.insert_if(iter, iter + num_rows, stencil, pred, hash_key, row_equal, stream.value());
+      return key_map.get_size() + static_cast<std::size_t>(null_count > 0);
+    }
+    // otherwise, insert all
+    key_map.insert(iter, iter + num_rows, hash_key, row_equal, stream.value());
+    return key_map.get_size();
+  };
+
+  if (cudf::detail::has_nested_columns(keys)) {
+    auto const row_equal = row_comp.equal_to<true>(has_nulls, nulls_equal);
+    return comparator_helper(row_equal);
+  } else {
+    auto const row_equal = row_comp.equal_to<false>(has_nulls, nulls_equal);
+    return comparator_helper(row_equal);
   }
-  // otherwise, insert all
-  key_map.insert(iter, iter + num_rows, hash_key, row_equal, stream.value());
-  return key_map.get_size();
 }
 
 cudf::size_type distinct_count(column_view const& input,
diff --git a/cpp/src/stream_compaction/stream_compaction_common.cuh b/cpp/src/stream_compaction/stream_compaction_common.cuh
index 0970a99edad..02cef0e6467 100644
--- a/cpp/src/stream_compaction/stream_compaction_common.cuh
+++ b/cpp/src/stream_compaction/stream_compaction_common.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,24 +29,6 @@
 namespace cudf {
 namespace detail {
 
-/**
- * @brief Device callable to hash a given row.
- */
-template <typename Nullate>
-class compaction_hash {
- public:
-  compaction_hash(Nullate has_nulls, table_device_view t) : _hash{has_nulls, t} {}
-
-  __device__ inline auto operator()(size_type i) const noexcept
-  {
-    auto hash = _hash(i);
-    return (hash == COMPACTION_EMPTY_KEY_SENTINEL) ? (hash - 1) : hash;
-  }
-
- private:
-  row_hash _hash;
-};
-
 namespace experimental {
 
 /**
diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp
index 82dee50ee94..eb57a62fd71 100644
--- a/cpp/src/stream_compaction/stream_compaction_common.hpp
+++ b/cpp/src/stream_compaction/stream_compaction_common.hpp
@@ -41,7 +41,5 @@ using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allo
 using hash_map_type =
   cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
 
-using row_hash = cudf::row_hasher<default_hash, cudf::nullate::DYNAMIC>;
-
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu
index 8363ee8120b..c7c10438d7a 100644
--- a/cpp/src/stream_compaction/unique_count.cu
+++ b/cpp/src/stream_compaction/unique_count.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
 #include <cudf/detail/sorting.hpp>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/stream_compaction.hpp>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
@@ -70,14 +71,24 @@ cudf::size_type unique_count(table_view const& keys,
                              null_equality nulls_equal,
                              rmm::cuda_stream_view stream)
 {
-  auto table_ptr = cudf::table_device_view::create(keys, stream);
-  row_equality_comparator comp(
-    nullate::DYNAMIC{cudf::has_nulls(keys)}, *table_ptr, *table_ptr, nulls_equal);
-  return thrust::count_if(
-    rmm::exec_policy(stream),
-    thrust::counting_iterator<cudf::size_type>(0),
-    thrust::counting_iterator<cudf::size_type>(keys.num_rows()),
-    [comp] __device__(cudf::size_type i) { return (i == 0 or not comp(i, i - 1)); });
+  auto const row_comp = cudf::experimental::row::equality::self_comparator(keys, stream);
+  if (cudf::detail::has_nested_columns(keys)) {
+    auto const comp =
+      row_comp.equal_to<true>(nullate::DYNAMIC{has_nested_nulls(keys)}, nulls_equal);
+    return thrust::count_if(
+      rmm::exec_policy(stream),
+      thrust::counting_iterator<cudf::size_type>(0),
+      thrust::counting_iterator<cudf::size_type>(keys.num_rows()),
+      [comp] __device__(cudf::size_type i) { return (i == 0 or not comp(i, i - 1)); });
+  } else {
+    auto const comp =
+      row_comp.equal_to<false>(nullate::DYNAMIC{has_nested_nulls(keys)}, nulls_equal);
+    return thrust::count_if(
+      rmm::exec_policy(stream),
+      thrust::counting_iterator<cudf::size_type>(0),
+      thrust::counting_iterator<cudf::size_type>(keys.num_rows()),
+      [comp] __device__(cudf::size_type i) { return (i == 0 or not comp(i, i - 1)); });
+  }
 }
 
 cudf::size_type unique_count(column_view const& input,
diff --git a/cpp/tests/stream_compaction/distinct_count_tests.cpp b/cpp/tests/stream_compaction/distinct_count_tests.cpp
index 31bbd43c78d..c7b6d36c538 100644
--- a/cpp/tests/stream_compaction/distinct_count_tests.cpp
+++ b/cpp/tests/stream_compaction/distinct_count_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,12 +24,18 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <algorithm>
 #include <cmath>
 
+using lists_col   = cudf::test::lists_column_wrapper<int32_t>;
+using structs_col = cudf::test::structs_column_wrapper;
+
+using cudf::test::iterators::nulls_at;
+
 using cudf::nan_policy;
 using cudf::null_equality;
 using cudf::null_policy;
@@ -306,3 +312,48 @@ TEST_F(DistinctCount, TableWithStringColumnWithNull)
   EXPECT_EQ(9, cudf::distinct_count(input, null_equality::EQUAL));
   EXPECT_EQ(10, cudf::distinct_count(input, null_equality::UNEQUAL));
 }
+
+TEST_F(DistinctCount, NullableLists)
+{
+  auto const keys = lists_col{
+    {{}, {1, 1}, {1}, {} /*NULL*/, {1}, {} /*NULL*/, {2}, {2, 1}, {2}, {2, 2}, {}, {2, 2}},
+    nulls_at({3, 5})};
+  auto const input = cudf::table_view{{keys}};
+
+  EXPECT_EQ(7, cudf::distinct_count(input, null_equality::EQUAL));
+  EXPECT_EQ(8, cudf::distinct_count(input, null_equality::UNEQUAL));
+}
+
+TEST_F(DistinctCount, NullableStructOfStructs)
+{
+  //  +-----------------+
+  //  |  s1{s2{a,b}, c} |
+  //  +-----------------+
+  // 0 |  { {1, 1}, 5}  |
+  // 1 |  { Null,   4}  |
+  // 2 |  { {1, 1}, 5}  |  // Same as 0
+  // 3 |  { {1, 2}, 4}  |
+  // 4 |  { Null,   6}  |
+  // 5 |  { Null,   4}  |  // Same as 4
+  // 6 |  Null          |  // Same as 6
+  // 7 |  { {2, 1}, 5}  |
+  // 8 |  Null          |
+
+  auto const keys = [&] {
+    auto a  = cudf::test::fixed_width_column_wrapper<int32_t>{1, XXX, 1, 1, XXX, XXX, 0, 2, 0};
+    auto b  = cudf::test::fixed_width_column_wrapper<int32_t>{1, XXX, 1, 2, XXX, XXX, 0, 1, 0};
+    auto s2 = structs_col{{a, b}, nulls_at({1, 4, 5})};
+
+    auto c = cudf::test::fixed_width_column_wrapper<int32_t>{5, 4, 5, 4, 6, 4, 0, 5, 0};
+    std::vector<std::unique_ptr<cudf::column>> s1_children;
+    s1_children.emplace_back(s2.release());
+    s1_children.emplace_back(c.release());
+    auto const null_it = nulls_at({6, 8});
+    return structs_col(std::move(s1_children), std::vector<bool>{null_it, null_it + 9});
+  }();
+
+  auto const input = cudf::table_view{{keys}};
+
+  EXPECT_EQ(6, cudf::distinct_count(input, null_equality::EQUAL));
+  EXPECT_EQ(8, cudf::distinct_count(input, null_equality::UNEQUAL));
+}
diff --git a/cpp/tests/stream_compaction/unique_count_tests.cpp b/cpp/tests/stream_compaction/unique_count_tests.cpp
index 591fe042592..26a9ca26d2a 100644
--- a/cpp/tests/stream_compaction/unique_count_tests.cpp
+++ b/cpp/tests/stream_compaction/unique_count_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,12 +24,18 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <algorithm>
 #include <cmath>
 
+using lists_col   = cudf::test::lists_column_wrapper<int32_t>;
+using structs_col = cudf::test::structs_column_wrapper;
+
+using cudf::test::iterators::nulls_at;
+
 using cudf::nan_policy;
 using cudf::null_equality;
 using cudf::null_policy;
@@ -237,3 +243,48 @@ TEST_F(UniqueCount, EmptyColumn)
   constexpr auto expected = 0;
   EXPECT_EQ(expected, cudf::unique_count(input_col, null_policy::EXCLUDE, nan_policy::NAN_IS_NULL));
 }
+
+TEST_F(UniqueCount, NullableLists)
+{
+  auto const keys = lists_col{
+    {{}, {}, {1, 1}, {1}, {1}, {} /*NULL*/, {} /*NULL*/, {2}, {2}, {2, 1}, {2, 2}, {2, 2}},
+    nulls_at({5, 6})};
+  auto const input = cudf::table_view{{keys}};
+
+  EXPECT_EQ(7, cudf::unique_count(input, null_equality::EQUAL));
+  EXPECT_EQ(8, cudf::unique_count(input, null_equality::UNEQUAL));
+}
+
+TEST_F(UniqueCount, NullableStructOfStructs)
+{
+  //  +-----------------+
+  //  |  s1{s2{a,b}, c} |
+  //  +-----------------+
+  // 0 |  { {1, 1}, 5}  |
+  // 1 |  { {1, 1}, 5}  |  // Same as 0
+  // 2 |  { {1, 2}, 4}  |
+  // 3 |  { Null,   6}  |
+  // 4 |  { Null,   4}  |
+  // 5 |  { Null,   4}  |  // Same as 4
+  // 6 |  Null          |
+  // 7 |  Null          |  // Same as 6
+  // 8 |  { {2, 1}, 5}  |
+
+  auto const keys = [&] {
+    auto a  = cudf::test::fixed_width_column_wrapper<int32_t>{1, 1, 1, XXX, XXX, XXX, 2, 1, 2};
+    auto b  = cudf::test::fixed_width_column_wrapper<int32_t>{1, 1, 2, XXX, XXX, XXX, 2, 1, 1};
+    auto s2 = structs_col{{a, b}, nulls_at({3, 4, 5})};
+
+    auto c = cudf::test::fixed_width_column_wrapper<int32_t>{5, 5, 4, 6, 4, 4, 3, 3, 5};
+    std::vector<std::unique_ptr<cudf::column>> s1_children;
+    s1_children.emplace_back(s2.release());
+    s1_children.emplace_back(c.release());
+    auto const null_it = nulls_at({6, 7});
+    return structs_col(std::move(s1_children), std::vector<bool>{null_it, null_it + 9});
+  }();
+
+  auto const input = cudf::table_view{{keys}};
+
+  EXPECT_EQ(6, cudf::unique_count(input, null_equality::EQUAL));
+  EXPECT_EQ(8, cudf::unique_count(input, null_equality::UNEQUAL));
+}

From 5b625623872b89107584959f227aa804c23b448d Mon Sep 17 00:00:00 2001
From: AJ Schmidt <ajschmidt8@users.noreply.github.com>
Date: Wed, 8 Mar 2023 14:02:16 -0500
Subject: [PATCH 43/60] Fix `moto` env vars & pass `AWS_SESSION_TOKEN` to conda
 builds (#12902)

This PR adds some additional AWS related environment variable mocks to the `cudf` `pytest`s.

These environment variables are documented by `moto` here: https://docs.getmoto.org/en/latest/docs/getting_started.html#recommended-usage.

This is important because of some recent CI changes that we made which now populate the `AWS_SESSION_TOKEN` environment variable, which was not mocked prior to the changes in this PR.

Additionally, this PR adds the `AWS_SESSION_TOKEN` value to the `script_env` lists for our conda recipes so that it's value is passed through to builds. I also alphabetized the `script_env` lists.

Authors:
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)
  - Jordan Jacobelli (https://github.com/jjacobelli)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12902
---
 conda/recipes/cudf/meta.yaml                  | 17 +++++++++--------
 conda/recipes/cudf_kafka/meta.yaml            | 17 +++++++++--------
 conda/recipes/custreamz/meta.yaml             | 17 +++++++++--------
 conda/recipes/dask-cudf/meta.yaml             | 17 +++++++++--------
 conda/recipes/libcudf/meta.yaml               | 19 ++++++++++---------
 python/cudf/cudf/tests/test_s3.py             | 11 +++++++----
 .../dask_cudf/dask_cudf/io/tests/test_s3.py   | 11 +++++++----
 7 files changed, 60 insertions(+), 49 deletions(-)

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 1ce401abb81..8cf3fc61e7c 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -18,18 +18,19 @@ build:
   number: {{ GIT_DESCRIBE_NUMBER }}
   string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
-    - PARALLEL_LEVEL
-    - CMAKE_GENERATOR
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
     - CMAKE_C_COMPILER_LAUNCHER
-    - CMAKE_CXX_COMPILER_LAUNCHER
     - CMAKE_CUDA_COMPILER_LAUNCHER
-    - SCCACHE_S3_KEY_PREFIX=cudf-aarch64 # [aarch64]
-    - SCCACHE_S3_KEY_PREFIX=cudf-linux64 # [linux64]
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
     - SCCACHE_BUCKET
-    - SCCACHE_REGION
     - SCCACHE_IDLE_TIMEOUT
-    - AWS_ACCESS_KEY_ID
-    - AWS_SECRET_ACCESS_KEY
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=cudf-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=cudf-linux64 # [linux64]
   ignore_run_exports:
     # libcudf's run_exports pinning is looser than we would like
     - libcudf
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index 5cbea78e82b..ab92220d525 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -17,18 +17,19 @@ build:
   number: {{ GIT_DESCRIBE_NUMBER }}
   string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
-    - PARALLEL_LEVEL
-    - CMAKE_GENERATOR
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
     - CMAKE_C_COMPILER_LAUNCHER
-    - CMAKE_CXX_COMPILER_LAUNCHER
     - CMAKE_CUDA_COMPILER_LAUNCHER
-    - SCCACHE_S3_KEY_PREFIX=cudf-kafka-aarch64 # [aarch64]
-    - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64]
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
     - SCCACHE_BUCKET
-    - SCCACHE_REGION
     - SCCACHE_IDLE_TIMEOUT
-    - AWS_ACCESS_KEY_ID
-    - AWS_SECRET_ACCESS_KEY
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=cudf-kafka-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64]
 
 requirements:
   build:
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index af5705341e6..277c5d6cc17 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -17,18 +17,19 @@ build:
   number: {{ GIT_DESCRIBE_NUMBER }}
   string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
-    - PARALLEL_LEVEL
-    - CMAKE_GENERATOR
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
     - CMAKE_C_COMPILER_LAUNCHER
-    - CMAKE_CXX_COMPILER_LAUNCHER
     - CMAKE_CUDA_COMPILER_LAUNCHER
-    - SCCACHE_S3_KEY_PREFIX=custreamz-aarch64 # [aarch64]
-    - SCCACHE_S3_KEY_PREFIX=custreamz-linux64 # [linux64]
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
     - SCCACHE_BUCKET
-    - SCCACHE_REGION
     - SCCACHE_IDLE_TIMEOUT
-    - AWS_ACCESS_KEY_ID
-    - AWS_SECRET_ACCESS_KEY
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=custreamz-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=custreamz-linux64 # [linux64]
 
 requirements:
   host:
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index 3ee3d4d3952..85dd3231635 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -18,18 +18,19 @@ build:
   number: {{ GIT_DESCRIBE_NUMBER }}
   string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
-    - PARALLEL_LEVEL
-    - CMAKE_GENERATOR
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
     - CMAKE_C_COMPILER_LAUNCHER
-    - CMAKE_CXX_COMPILER_LAUNCHER
     - CMAKE_CUDA_COMPILER_LAUNCHER
-    - SCCACHE_S3_KEY_PREFIX=dask-cudf-aarch64 # [aarch64]
-    - SCCACHE_S3_KEY_PREFIX=dask-cudf-linux64 # [linux64]
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
     - SCCACHE_BUCKET
-    - SCCACHE_REGION
     - SCCACHE_IDLE_TIMEOUT
-    - AWS_ACCESS_KEY_ID
-    - AWS_SECRET_ACCESS_KEY
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=dask-cudf-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=dask-cudf-linux64 # [linux64]
 
 requirements:
   host:
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index fbfcf6e71a2..890ab9f6632 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -15,19 +15,20 @@ source:
 
 build:
   script_env:
-    - PARALLEL_LEVEL
-    - CMAKE_GENERATOR
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
     - CMAKE_C_COMPILER_LAUNCHER
-    - CMAKE_CXX_COMPILER_LAUNCHER
     - CMAKE_CUDA_COMPILER_LAUNCHER
-    - SCCACHE_S3_KEY_PREFIX=libcudf-aarch64 # [aarch64]
-    - SCCACHE_S3_KEY_PREFIX=libcudf-linux64 # [linux64]
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
+    - RAPIDS_ARTIFACTS_DIR
     - SCCACHE_BUCKET
-    - SCCACHE_REGION
     - SCCACHE_IDLE_TIMEOUT
-    - AWS_ACCESS_KEY_ID
-    - AWS_SECRET_ACCESS_KEY
-    - RAPIDS_ARTIFACTS_DIR
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=libcudf-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=libcudf-linux64 # [linux64]
 
 requirements:
   build:
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index de3bba25223..d54a2eabf22 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 import os
 import socket
@@ -59,9 +59,12 @@ def s3_base(endpoint_ip, endpoint_port):
     with ensure_safe_environment_variables():
         # Fake aws credentials exported to prevent botocore looking for
         # system aws credentials, https://github.com/spulec/moto/issues/1793
-        os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
-        os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
-        os.environ.setdefault("S3FS_LOGGING_LEVEL", "DEBUG")
+        os.environ["AWS_ACCESS_KEY_ID"] = "foobar_key"
+        os.environ["AWS_SECRET_ACCESS_KEY"] = "foobar_secret"
+        os.environ["S3FS_LOGGING_LEVEL"] = "DEBUG"
+        os.environ["AWS_SECURITY_TOKEN"] = "foobar_security_token"
+        os.environ["AWS_SESSION_TOKEN"] = "foobar_session_token"
+        os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
 
         # Launching moto in server mode, i.e., as a separate process
         # with an S3 endpoint on localhost
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_s3.py b/python/dask_cudf/dask_cudf/io/tests/test_s3.py
index 5be0cf7c887..7614ea38d6a 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_s3.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_s3.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 import os
 import socket
@@ -55,9 +55,12 @@ def s3_base(endpoint_ip, endpoint_port):
     with ensure_safe_environment_variables():
         # Fake aws credentials exported to prevent botocore looking for
         # system aws credentials, https://github.com/spulec/moto/issues/1793
-        os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
-        os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
-        os.environ.setdefault("S3FS_LOGGING_LEVEL", "DEBUG")
+        os.environ["AWS_ACCESS_KEY_ID"] = "foobar_key"
+        os.environ["AWS_SECRET_ACCESS_KEY"] = "foobar_secret"
+        os.environ["S3FS_LOGGING_LEVEL"] = "DEBUG"
+        os.environ["AWS_SECURITY_TOKEN"] = "foobar_security_token"
+        os.environ["AWS_SESSION_TOKEN"] = "foobar_session_token"
+        os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
 
         # Launching moto in server mode, i.e., as a separate process
         # with an S3 endpoint on localhost

From 50f7e682348b786831942f9831b6eeee10d60d24 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 8 Mar 2023 14:50:39 -0600
Subject: [PATCH 44/60] Deprecate `Index.is_*` methods (#12820)

This PR deprecates the following Index APIs to match with pandas 2.0:

- [x] is_numeric
- [x] is_boolean
- [x] is_integer
- [x] is_floating
- [x] is_object
- [x] is_categorical
- [x] is_interval

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/12820
---
 python/cudf/cudf/api/types.py             | 224 +++++++++++++++++++++-
 python/cudf/cudf/core/_base_index.py      |  81 +++++++-
 python/cudf/cudf/core/_compat.py          |   1 +
 python/cudf/cudf/core/dtypes.py           |   4 +-
 python/cudf/cudf/core/index.py            |  66 +++----
 python/cudf/cudf/core/multiindex.py       |  14 +-
 python/cudf/cudf/tests/test_index.py      |  11 +-
 python/cudf/cudf/tests/test_multiindex.py |  12 +-
 8 files changed, 359 insertions(+), 54 deletions(-)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index 62f8377a323..ba0f9ea47f2 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 
 """Define common type operations."""
 
@@ -60,6 +60,8 @@ def is_numeric_dtype(obj):
             getattr(obj, "dtype", None), _BaseDtype
         ):
             return False
+    if isinstance(obj, cudf.BaseIndex):
+        return obj._is_numeric()
     return pd_types.is_numeric_dtype(obj)
 
 
@@ -230,13 +232,228 @@ def _union_categoricals(
     return cudf.Index(result_col)
 
 
+def is_bool_dtype(arr_or_dtype):
+    """
+    Check whether the provided array or dtype is of a boolean dtype.
+
+    Parameters
+    ----------
+    arr_or_dtype : array-like or dtype
+        The array or dtype to check.
+
+    Returns
+    -------
+    boolean
+        Whether or not the array or dtype is of a boolean dtype.
+
+    Examples
+    --------
+    >>> from cudf.api.types import is_bool_dtype
+    >>> import numpy as np
+    >>> import cudf
+    >>> is_bool_dtype(str)
+    False
+    >>> is_bool_dtype(int)
+    False
+    >>> is_bool_dtype(bool)
+    True
+    >>> is_bool_dtype(np.bool_)
+    True
+    >>> is_bool_dtype(np.array(['a', 'b']))
+    False
+    >>> is_bool_dtype(cudf.Series([1, 2]))
+    False
+    >>> is_bool_dtype(np.array([True, False]))
+    True
+    >>> is_bool_dtype(cudf.Series([True, False], dtype='category'))
+    True
+    """
+    if isinstance(arr_or_dtype, cudf.BaseIndex):
+        return arr_or_dtype._is_boolean()
+    elif isinstance(arr_or_dtype, cudf.Series):
+        if isinstance(arr_or_dtype.dtype, cudf.CategoricalDtype):
+            return is_bool_dtype(arr_or_dtype=arr_or_dtype.dtype)
+        else:
+            return pd_types.is_bool_dtype(arr_or_dtype=arr_or_dtype.dtype)
+    elif isinstance(arr_or_dtype, cudf.CategoricalDtype):
+        return pd_types.is_bool_dtype(
+            arr_or_dtype=arr_or_dtype.categories.dtype
+        )
+    else:
+        return pd_types.is_bool_dtype(arr_or_dtype=arr_or_dtype)
+
+
+def is_object_dtype(arr_or_dtype):
+    """
+    Check whether an array-like or dtype is of the object dtype.
+
+    Parameters
+    ----------
+    arr_or_dtype : array-like or dtype
+        The array-like or dtype to check.
+
+    Returns
+    -------
+    boolean
+        Whether or not the array-like or dtype is of the object dtype.
+
+    Examples
+    --------
+    >>> from cudf.api.types import is_object_dtype
+    >>> import numpy as np
+    >>> is_object_dtype(object)
+    True
+    >>> is_object_dtype(int)
+    False
+    >>> is_object_dtype(np.array([], dtype=object))
+    True
+    >>> is_object_dtype(np.array([], dtype=int))
+    False
+    >>> is_object_dtype([1, 2, 3])
+    False
+    """
+    if isinstance(arr_or_dtype, cudf.BaseIndex):
+        return arr_or_dtype._is_object()
+    elif isinstance(arr_or_dtype, cudf.Series):
+        return pd_types.is_object_dtype(arr_or_dtype=arr_or_dtype.dtype)
+    else:
+        return pd_types.is_object_dtype(arr_or_dtype=arr_or_dtype)
+
+
+def is_float_dtype(arr_or_dtype) -> bool:
+    """
+    Check whether the provided array or dtype is of a float dtype.
+
+    Parameters
+    ----------
+    arr_or_dtype : array-like or dtype
+        The array or dtype to check.
+
+    Returns
+    -------
+    boolean
+        Whether or not the array or dtype is of a float dtype.
+
+    Examples
+    --------
+    >>> from cudf.api.types import is_float_dtype
+    >>> import numpy as np
+    >>> import cudf
+    >>> is_float_dtype(str)
+    False
+    >>> is_float_dtype(int)
+    False
+    >>> is_float_dtype(float)
+    True
+    >>> is_float_dtype(np.array(['a', 'b']))
+    False
+    >>> is_float_dtype(cudf.Series([1, 2]))
+    False
+    >>> is_float_dtype(cudf.Index([1, 2.]))
+    True
+    """
+    if isinstance(arr_or_dtype, cudf.BaseIndex):
+        return arr_or_dtype._is_floating()
+    return _wrap_pandas_is_dtype_api(pd_types.is_float_dtype)(arr_or_dtype)
+
+
+def is_integer_dtype(arr_or_dtype) -> bool:
+    """
+    Check whether the provided array or dtype is of an integer dtype.
+    Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
+
+    Parameters
+    ----------
+    arr_or_dtype : array-like or dtype
+        The array or dtype to check.
+
+    Returns
+    -------
+    boolean
+        Whether or not the array or dtype is of an integer dtype and
+        not an instance of timedelta64.
+
+    Examples
+    --------
+    >>> from cudf.api.types import is_integer_dtype
+    >>> import numpy as np
+    >>> import cudf
+    >>> is_integer_dtype(str)
+    False
+    >>> is_integer_dtype(int)
+    True
+    >>> is_integer_dtype(float)
+    False
+    >>> is_integer_dtype(np.uint64)
+    True
+    >>> is_integer_dtype('int8')
+    True
+    >>> is_integer_dtype('Int8')
+    True
+    >>> is_integer_dtype(np.datetime64)
+    False
+    >>> is_integer_dtype(np.timedelta64)
+    False
+    >>> is_integer_dtype(np.array(['a', 'b']))
+    False
+    >>> is_integer_dtype(cudf.Series([1, 2]))
+    True
+    >>> is_integer_dtype(np.array([], dtype=np.timedelta64))
+    False
+    >>> is_integer_dtype(cudf.Index([1, 2.]))  # float
+    False
+    """
+    if isinstance(arr_or_dtype, cudf.BaseIndex):
+        return arr_or_dtype._is_integer()
+    return _wrap_pandas_is_dtype_api(pd_types.is_integer_dtype)(arr_or_dtype)
+
+
+def is_any_real_numeric_dtype(arr_or_dtype) -> bool:
+    """
+    Check whether the provided array or dtype is of a real number dtype.
+
+    Parameters
+    ----------
+    arr_or_dtype : array-like or dtype
+        The array or dtype to check.
+
+    Returns
+    -------
+    boolean
+        Whether or not the array or dtype is of a real number dtype.
+
+    Examples
+    --------
+    >>> from cudf.api.types import is_any_real_numeric_dtype
+    >>> import cudf
+    >>> is_any_real_numeric_dtype(int)
+    True
+    >>> is_any_real_numeric_dtype(float)
+    True
+    >>> is_any_real_numeric_dtype(object)
+    False
+    >>> is_any_real_numeric_dtype(str)
+    False
+    >>> is_any_real_numeric_dtype(complex(1, 2))
+    False
+    >>> is_any_real_numeric_dtype(bool)
+    False
+    >>> is_any_real_numeric_dtype(cudf.Index([1, 2, 3]))
+    True
+    """
+    return (
+        is_numeric_dtype(arr_or_dtype)
+        and not is_complex_dtype(arr_or_dtype)
+        and not is_bool_dtype(arr_or_dtype)
+    )
+
+
 # TODO: The below alias is removed for now since improving cudf categorical
 # support is ongoing and we don't want to introduce any ambiguities. The above
 # method _union_categoricals will take its place once exposed.
 # union_categoricals = pd_types.union_categoricals
 infer_dtype = pd_types.infer_dtype
 pandas_dtype = pd_types.pandas_dtype
-is_bool_dtype = pd_types.is_bool_dtype
 is_complex_dtype = pd_types.is_complex_dtype
 # TODO: Evaluate which of the datetime types need special handling for cudf.
 is_datetime_dtype = _wrap_pandas_is_dtype_api(pd_types.is_datetime64_dtype)
@@ -246,10 +463,7 @@ def _union_categoricals(
 is_datetime64tz_dtype = pd_types.is_datetime64tz_dtype
 is_extension_type = pd_types.is_extension_type
 is_extension_array_dtype = pd_types.is_extension_array_dtype
-is_float_dtype = _wrap_pandas_is_dtype_api(pd_types.is_float_dtype)
 is_int64_dtype = pd_types.is_int64_dtype
-is_integer_dtype = _wrap_pandas_is_dtype_api(pd_types.is_integer_dtype)
-is_object_dtype = pd_types.is_object_dtype
 is_period_dtype = pd_types.is_period_dtype
 is_signed_integer_dtype = pd_types.is_signed_integer_dtype
 is_timedelta_dtype = _wrap_pandas_is_dtype_api(pd_types.is_timedelta64_dtype)
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 8f8f2afc734..68c52a8b9e8 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -507,8 +507,8 @@ def intersection(self, other, sort=False):
 
         res_name = _get_result_name(self.name, other.name)
 
-        if (self.is_boolean() and other.is_numeric()) or (
-            self.is_numeric() and other.is_boolean()
+        if (self._is_boolean() and other._is_numeric()) or (
+            self._is_numeric() and other._is_boolean()
         ):
             if isinstance(self, cudf.MultiIndex):
                 return self[:0].rename(res_name)
@@ -841,6 +841,9 @@ def is_numeric(self):
         """
         Check if the Index only consists of numeric data.
 
+        .. deprecated:: 23.04
+           Use `cudf.api.types.is_any_real_numeric_dtype` instead.
+
         Returns
         -------
         bool
@@ -874,12 +877,23 @@ def is_numeric(self):
         >>> idx.is_numeric()
         False
         """
+        warnings.warn(
+            f"{type(self).__name__}.is_numeric is deprecated. "
+            "Use cudf.api.types.is_any_real_numeric_dtype instead",
+            FutureWarning,
+        )
+        return self._is_numeric()
+
+    def _is_numeric(self):
         raise NotImplementedError
 
     def is_boolean(self):
         """
         Check if the Index only consists of booleans.
 
+        .. deprecated:: 23.04
+           Use `cudf.api.types.is_bool_dtype` instead.
+
         Returns
         -------
         bool
@@ -907,12 +921,23 @@ def is_boolean(self):
         >>> idx.is_boolean()
         False
         """
+        warnings.warn(
+            f"{type(self).__name__}.is_boolean is deprecated. "
+            "Use cudf.api.types.is_bool_dtype instead",
+            FutureWarning,
+        )
+        return self._is_boolean()
+
+    def _is_boolean(self):
         raise NotImplementedError
 
     def is_integer(self):
         """
         Check if the Index only consists of integers.
 
+        .. deprecated:: 23.04
+           Use `cudf.api.types.is_integer_dtype` instead.
+
         Returns
         -------
         bool
@@ -940,6 +965,14 @@ def is_integer(self):
         >>> idx.is_integer()
         False
         """
+        warnings.warn(
+            f"{type(self).__name__}.is_integer is deprecated. "
+            "Use cudf.api.types.is_integer_dtype instead",
+            FutureWarning,
+        )
+        return self._is_integer()
+
+    def _is_integer(self):
         raise NotImplementedError
 
     def is_floating(self):
@@ -949,6 +982,9 @@ def is_floating(self):
         The Index may consist of only floats, NaNs, or a mix of floats,
         integers, or NaNs.
 
+        .. deprecated:: 23.04
+           Use `cudf.api.types.is_float_dtype` instead.
+
         Returns
         -------
         bool
@@ -980,12 +1016,23 @@ def is_floating(self):
         >>> idx.is_floating()
         False
         """
+        warnings.warn(
+            f"{type(self).__name__}.is_floating is deprecated. "
+            "Use cudf.api.types.is_float_dtype instead",
+            FutureWarning,
+        )
+        return self._is_floating()
+
+    def _is_floating(self):
         raise NotImplementedError
 
     def is_object(self):
         """
         Check if the Index is of the object dtype.
 
+        .. deprecated:: 23.04
+           Use `cudf.api.types.is_object_dtype` instead.
+
         Returns
         -------
         bool
@@ -1014,12 +1061,23 @@ def is_object(self):
         >>> idx.is_object()
         False
         """
+        warnings.warn(
+            f"{type(self).__name__}.is_object is deprecated. "
+            "Use cudf.api.types.is_object_dtype instead",
+            FutureWarning,
+        )
+        return self._is_object()
+
+    def _is_object(self):
         raise NotImplementedError
 
     def is_categorical(self):
         """
         Check if the Index holds categorical data.
 
+        .. deprecated:: 23.04
+           Use `cudf.api.types.is_categorical_dtype` instead.
+
         Returns
         -------
         bool
@@ -1055,12 +1113,23 @@ def is_categorical(self):
         >>> s.index.is_categorical()
         False
         """
+        warnings.warn(
+            f"{type(self).__name__}.is_categorical is deprecated. "
+            "Use cudf.api.types.is_categorical_dtype instead",
+            FutureWarning,
+        )
+        return self._is_categorical()
+
+    def _is_categorical(self):
         raise NotImplementedError
 
     def is_interval(self):
         """
         Check if the Index holds Interval objects.
 
+        .. deprecated:: 23.04
+           Use `cudf.api.types.is_interval_dtype` instead.
+
         Returns
         -------
         bool
@@ -1090,6 +1159,14 @@ def is_interval(self):
         >>> idx.is_interval()
         False
         """
+        warnings.warn(
+            f"{type(self).__name__}.is_interval is deprecated. "
+            "Use cudf.api.types.is_interval_dtype instead",
+            FutureWarning,
+        )
+        return self._is_interval()
+
+    def _is_interval(self):
         raise NotImplementedError
 
     def _union(self, other, sort=None):
diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index 0171fdaac2b..722f9677db0 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -13,3 +13,4 @@
 PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0")
 PANDAS_GE_150 = PANDAS_VERSION >= version.parse("1.5.0")
 PANDAS_LT_153 = PANDAS_VERSION < version.parse("1.5.3")
+PANDAS_GE_200 = PANDAS_VERSION >= version.parse("2.0.0")
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 66fc717a718..963f13acf10 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -960,10 +960,11 @@ def is_categorical_dtype(obj):
         return False
     if isinstance(obj, str) and obj == "category":
         return True
+    if isinstance(obj, cudf.core.index.BaseIndex):
+        return obj._is_categorical()
     if isinstance(
         obj,
         (
-            cudf.Index,
             cudf.Series,
             cudf.core.column.ColumnBase,
             pd.Index,
@@ -1072,6 +1073,7 @@ def is_interval_dtype(obj):
             ),
         )
         or obj is cudf.core.dtypes.IntervalDtype
+        or (isinstance(obj, cudf.core.index.BaseIndex) and obj._is_interval())
         or (
             isinstance(obj, str) and obj == cudf.core.dtypes.IntervalDtype.name
         )
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index cd882aba297..413e005b798 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -258,25 +258,25 @@ def _values(self):
     def _clean_nulls_from_index(self):
         return self
 
-    def is_numeric(self):
+    def _is_numeric(self):
         return True
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return False
 
-    def is_integer(self):
+    def _is_integer(self):
         return True
 
-    def is_floating(self):
+    def _is_floating(self):
         return False
 
-    def is_object(self):
+    def _is_object(self):
         return False
 
-    def is_categorical(self):
+    def _is_categorical(self):
         return False
 
-    def is_interval(self):
+    def _is_interval(self):
         return False
 
     @property  # type: ignore
@@ -1458,25 +1458,25 @@ def get_slice_bound(self, label, side, kind=None):
             )
         return self._values.get_slice_bound(label, side, kind)
 
-    def is_numeric(self):
+    def _is_numeric(self):
         return False
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return True
 
-    def is_integer(self):
+    def _is_integer(self):
         return False
 
-    def is_floating(self):
+    def _is_floating(self):
         return False
 
-    def is_object(self):
+    def _is_object(self):
         return False
 
-    def is_categorical(self):
+    def _is_categorical(self):
         return False
 
-    def is_interval(self):
+    def _is_interval(self):
         return False
 
     @property  # type: ignore
@@ -1655,25 +1655,25 @@ def __init__(self, data=None, dtype=None, copy=False, name=None):
 
         super().__init__(data, **kwargs)
 
-    def is_numeric(self):
+    def _is_numeric(self):
         return True
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return False
 
-    def is_integer(self):
+    def _is_integer(self):
         return True
 
-    def is_floating(self):
+    def _is_floating(self):
         return False
 
-    def is_object(self):
+    def _is_object(self):
         return False
 
-    def is_categorical(self):
+    def _is_categorical(self):
         return False
 
-    def is_interval(self):
+    def _is_interval(self):
         return False
 
 
@@ -1901,10 +1901,10 @@ class Float32Index(NumericIndex):
 
     _dtype = np.float32
 
-    def is_integer(self):
+    def _is_integer(self):
         return False
 
-    def is_floating(self):
+    def _is_floating(self):
         return True
 
 
@@ -1932,10 +1932,10 @@ class Float64Index(NumericIndex):
 
     _dtype = np.float64
 
-    def is_integer(self):
+    def _is_integer(self):
         return False
 
-    def is_floating(self):
+    def _is_floating(self):
         return True
 
 
@@ -2381,7 +2381,7 @@ def _get_dt_field(self, field):
         )
         return as_index(out_column, name=self.name)
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return False
 
     @_cudf_nvtx_annotate
@@ -2629,7 +2629,7 @@ def inferred_freq(self):
         """
         raise NotImplementedError("inferred_freq is not yet supported")
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return False
 
 
@@ -2747,10 +2747,10 @@ def categories(self):
         """
         return as_index(self._values.categories)
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return False
 
-    def is_categorical(self):
+    def _is_categorical(self):
         return True
 
 
@@ -3001,10 +3001,10 @@ def __getitem__(self, index):
             "Getting a scalar from an IntervalIndex is not yet supported"
         )
 
-    def is_interval(self):
+    def _is_interval(self):
         return True
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return False
 
 
@@ -3066,10 +3066,10 @@ def _clean_nulls_from_index(self):
         else:
             return self
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return False
 
-    def is_object(self):
+    def _is_object(self):
         return True
 
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 1f26371f797..1ce4cc218f8 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1045,25 +1045,25 @@ def get_level_values(self, level):
         level_values = as_index(self._data[level], name=self.names[level_idx])
         return level_values
 
-    def is_numeric(self):
+    def _is_numeric(self):
         return False
 
-    def is_boolean(self):
+    def _is_boolean(self):
         return False
 
-    def is_integer(self):
+    def _is_integer(self):
         return False
 
-    def is_floating(self):
+    def _is_floating(self):
         return False
 
-    def is_object(self):
+    def _is_object(self):
         return False
 
-    def is_categorical(self):
+    def _is_categorical(self):
         return False
 
-    def is_interval(self):
+    def _is_interval(self):
         return False
 
     @classmethod
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index f0b74ce70e7..29601cbd203 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -11,7 +11,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_133
+from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_133, PANDAS_GE_200
 from cudf.core.index import (
     CategoricalIndex,
     DatetimeIndex,
@@ -2404,8 +2404,13 @@ def test_index_type_methods(data, func):
     pidx = pd.Index(data)
     gidx = cudf.from_pandas(pidx)
 
-    expected = getattr(pidx, func)()
-    actual = getattr(gidx, func)()
+    if PANDAS_GE_200:
+        with pytest.warns(FutureWarning):
+            expected = getattr(pidx, func)()
+    else:
+        expected = getattr(pidx, func)()
+    with pytest.warns(FutureWarning):
+        actual = getattr(gidx, func)()
 
     if gidx.dtype == np.dtype("bool") and func == "is_object":
         assert_eq(False, actual)
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index 14e3a2a1b9b..bd9f36a595d 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -16,7 +16,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_130
+from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_200
 from cudf.core.column import as_column
 from cudf.core.index import as_index
 from cudf.testing._utils import (
@@ -1819,8 +1819,14 @@ def test_pickle_roundtrip_multiindex(names):
 def test_multiindex_type_methods(pidx, func):
     gidx = cudf.from_pandas(pidx)
 
-    expected = getattr(pidx, func)()
-    actual = getattr(gidx, func)()
+    if PANDAS_GE_200:
+        with pytest.warns(FutureWarning):
+            expected = getattr(pidx, func)()
+    else:
+        expected = getattr(pidx, func)()
+
+    with pytest.warns(FutureWarning):
+        actual = getattr(gidx, func)()
 
     if func == "is_object":
         assert_eq(False, actual)

From 553162cbb3bdf3bb55a6a3694543180edcba3085 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 8 Mar 2023 14:53:28 -0600
Subject: [PATCH 45/60] Add performance benchmarks to user facing docs (#12595)

Resolves: #12295

This PR introduces a notebook of benchmarks that users will be able to run if they download the notebook. The notebook also generates graphs which are going to show up in cudf python docs.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12595
---
 ci/test_notebooks.sh                          |    2 +-
 docs/cudf/source/conf.py                      |    2 +
 docs/cudf/source/user_guide/index.md          |    1 +
 .../user_guide/performance_comparisons.ipynb  | 1647 +++++++++++++++++
 notebooks/performance_comparisons.ipynb       |    1 +
 5 files changed, 1652 insertions(+), 1 deletion(-)
 create mode 100644 docs/cudf/source/user_guide/performance_comparisons.ipynb
 create mode 120000 notebooks/performance_comparisons.ipynb

diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index 7f5f35219b0..c9dc99733a9 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -34,7 +34,7 @@ pushd notebooks
 
 # Add notebooks that should be skipped here
 # (space-separated list of filenames without paths)
-SKIPNBS=""
+SKIPNBS="performance_comparisons.ipynb"
 
 EXITCODE=0
 trap "EXITCODE=1" ERR
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index b97879b0ae4..af57f9245a8 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -45,6 +45,8 @@
     "myst_nb",
 ]
 
+nb_execution_excludepatterns = ['performance-comparisons.ipynb']
+
 nb_execution_mode = "force"
 nb_execution_timeout = 300
 
diff --git a/docs/cudf/source/user_guide/index.md b/docs/cudf/source/user_guide/index.md
index d3ead13132f..0d74586e7a8 100644
--- a/docs/cudf/source/user_guide/index.md
+++ b/docs/cudf/source/user_guide/index.md
@@ -12,6 +12,7 @@ groupby
 guide-to-udfs
 cupy-interop
 options
+performance-comparisons
 PandasCompat
 copy-on-write
 ```
diff --git a/docs/cudf/source/user_guide/performance_comparisons.ipynb b/docs/cudf/source/user_guide/performance_comparisons.ipynb
new file mode 100644
index 00000000000..3dd671c37cc
--- /dev/null
+++ b/docs/cudf/source/user_guide/performance_comparisons.ipynb
@@ -0,0 +1,1647 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Performance comparison"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook compares the performance of `cuDF` and `pandas`. The comparisons performed are on identical data sizes. This notebook primarily showcases the factor\n",
+    "of speedups users can have when the similar `pandas` APIs are run on GPUs using `cudf`.\n",
+    "\n",
+    "The hardware details used to run these performance comparisons are at the end of this page."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import time\n",
+    "import timeit\n",
+    "from io import BytesIO\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "import cudf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "np.random.seed(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Concat, count & joins performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>numbers</th>\n",
+       "      <th>business</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-316</td>\n",
+       "      <td>Costco</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-441</td>\n",
+       "      <td>Costco</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>653</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>216</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-165</td>\n",
+       "      <td>Walmart</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999995</th>\n",
+       "      <td>-395</td>\n",
+       "      <td>Walmart</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999996</th>\n",
+       "      <td>-653</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999997</th>\n",
+       "      <td>364</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999998</th>\n",
+       "      <td>159</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999999</th>\n",
+       "      <td>-501</td>\n",
+       "      <td>Walmart</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>300000000 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           numbers business\n",
+       "0             -316   Costco\n",
+       "1             -441   Costco\n",
+       "2              653  Buckees\n",
+       "3              216  Buckees\n",
+       "4             -165  Walmart\n",
+       "...            ...      ...\n",
+       "299999995     -395  Walmart\n",
+       "299999996     -653  Buckees\n",
+       "299999997      364  Buckees\n",
+       "299999998      159  Buckees\n",
+       "299999999     -501  Walmart\n",
+       "\n",
+       "[300000000 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "num_rows = 300_000_000\n",
+    "pdf = pd.DataFrame(\n",
+    "    {\n",
+    "        \"numbers\": np.random.randint(-1000, 1000, num_rows, dtype=\"int64\"),\n",
+    "        \"business\": np.random.choice(\n",
+    "            [\"McD\", \"Buckees\", \"Walmart\", \"Costco\"], size=num_rows\n",
+    "        ),\n",
+    "    }\n",
+    ")\n",
+    "pdf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>numbers</th>\n",
+       "      <th>business</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-316</td>\n",
+       "      <td>Costco</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-441</td>\n",
+       "      <td>Costco</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>653</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>216</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-165</td>\n",
+       "      <td>Walmart</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999995</th>\n",
+       "      <td>-395</td>\n",
+       "      <td>Walmart</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999996</th>\n",
+       "      <td>-653</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999997</th>\n",
+       "      <td>364</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999998</th>\n",
+       "      <td>159</td>\n",
+       "      <td>Buckees</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299999999</th>\n",
+       "      <td>-501</td>\n",
+       "      <td>Walmart</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>300000000 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           numbers business\n",
+       "0             -316   Costco\n",
+       "1             -441   Costco\n",
+       "2              653  Buckees\n",
+       "3              216  Buckees\n",
+       "4             -165  Walmart\n",
+       "...            ...      ...\n",
+       "299999995     -395  Walmart\n",
+       "299999996     -653  Buckees\n",
+       "299999997      364  Buckees\n",
+       "299999998      159  Buckees\n",
+       "299999999     -501  Walmart\n",
+       "\n",
+       "[300000000 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gdf = cudf.from_pandas(pdf)\n",
+    "gdf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def timeit_pandas_cudf(pd_obj, gd_obj, func, **kwargs):\n",
+    "    \"\"\"\n",
+    "    A utility function to measure execution time of an\n",
+    "    API(`func`) in pandas & cudf.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    pd_obj : Pandas object\n",
+    "    gd_obj : cuDF object\n",
+    "    func : callable\n",
+    "    \"\"\"\n",
+    "    pandas_time = timeit.timeit(lambda: func(pd_obj), **kwargs)\n",
+    "    cudf_time = timeit.timeit(lambda: func(gd_obj), **kwargs)\n",
+    "    return pandas_time, cudf_time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pandas_value_counts, cudf_value_counts = timeit_pandas_cudf(\n",
+    "    pdf, gdf, lambda df: df.value_counts(), number=30\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pdf = pdf.head(100_000_000)\n",
+    "gdf = gdf.head(100_000_000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pandas_concat = timeit.timeit(lambda: pd.concat([pdf, pdf, pdf]), number=30)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "cudf_concat = timeit.timeit(lambda: cudf.concat([gdf, gdf, gdf]), number=30)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pandas_groupby, cudf_groupby = timeit_pandas_cudf(\n",
+    "    pdf,\n",
+    "    gdf,\n",
+    "    lambda df: df.groupby(\"business\").agg([\"min\", \"max\", \"mean\"]),\n",
+    "    number=30,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "num_rows = 1_000_000\n",
+    "pdf = pd.DataFrame(\n",
+    "    {\n",
+    "        \"numbers\": np.random.randint(-1000, 1000, num_rows, dtype=\"int64\"),\n",
+    "        \"business\": np.random.choice(\n",
+    "            [\"McD\", \"Buckees\", \"Walmart\", \"Costco\"], size=num_rows\n",
+    "        ),\n",
+    "    }\n",
+    ")\n",
+    "gdf = cudf.from_pandas(pdf)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pandas_merge, cudf_merge = timeit_pandas_cudf(\n",
+    "    pdf, gdf, lambda df: df.merge(df), number=30\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "performance_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"cudf speedup vs. pandas\": [\n",
+    "            pandas_value_counts / cudf_value_counts,\n",
+    "            pandas_concat / cudf_concat,\n",
+    "            pandas_groupby / cudf_groupby,\n",
+    "            pandas_merge / cudf_merge,\n",
+    "        ],\n",
+    "    },\n",
+    "    index=[\"value_counts\", \"concat\", \"groupby\", \"merge\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cudf speedup vs. pandas</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>value_counts</th>\n",
+       "      <td>282.901300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>concat</th>\n",
+       "      <td>203.624680</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>groupby</th>\n",
+       "      <td>138.495762</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>merge</th>\n",
+       "      <td>136.519031</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              cudf speedup vs. pandas\n",
+       "value_counts               282.901300\n",
+       "concat                     203.624680\n",
+       "groupby                    138.495762\n",
+       "merge                      136.519031"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "performance_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAG2CAYAAACZEEfAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABTfElEQVR4nO3dd1gUZ98+/HNpy9JWelFEFKzYokYl/uyNRGx5IonGQCTGrsSWEE3kjoWosUSxRB9jiQWNBqOxNyxRo6BEjAoWjJjATaIIgri06/3Dl3lcKbKysDien+OY43CvuXbmOzssnF7TFEIIASIiIiKZMjJ0AURERESViWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkrdqEnfDwcCgUCoSEhEhtQgiEhYXBzc0NKpUKnTt3xh9//KH1Po1Gg3HjxsHBwQGWlpbo27cv7t69W8XVExERUXVVLcLO+fPnsWrVKjRr1kyrfd68eVi4cCEiIiJw/vx5uLi4oEePHnj48KHUJyQkBFFRUYiMjMSpU6eQlZWFPn36oKCgoKo3g4iIiKohg4edrKwsDBkyBKtXr4atra3ULoTA4sWLMW3aNAwcOBA+Pj5Yv349Hj16hM2bNwMAMjIysGbNGixYsADdu3dHy5YtsXHjRsTHx+Pw4cOG2iQiIiKqRkwMXcCYMWPw1ltvoXv37pg1a5bUnpSUhNTUVPTs2VNqUyqV6NSpE06fPo0RI0YgNjYWeXl5Wn3c3Nzg4+OD06dPo1evXiWuU6PRQKPRSK8LCwtx//592NvbQ6FQVMJWEhERkb4JIfDw4UO4ubnByKj08RuDhp3IyEhcuHAB58+fLzYvNTUVAODs7KzV7uzsjD///FPqY2ZmpjUiVNSn6P0lCQ8Px3/+85+Klk9ERETVQHJyMmrVqlXqfIOFneTkZEyYMAEHDx6Eubl5qf2eHWkRQjx39OV5fUJDQzFx4kTpdUZGBmrXro3k5GTY2NiUcwuIiIjIkDIzM+Hu7g5ra+sy+xks7MTGxiItLQ2tWrWS2goKCnDixAlEREQgISEBwJPRG1dXV6lPWlqaNNrj4uKC3NxcpKena43upKWlwdfXt9R1K5VKKJXKYu02NjYMO0RERC+Z5w2CGOwE5W7duiE+Ph5xcXHS1Lp1awwZMgRxcXGoW7cuXFxccOjQIek9ubm5OH78uBRkWrVqBVNTU60+KSkpuHz5cplhh4iIiF4dBhvZsba2ho+Pj1abpaUl7O3tpfaQkBDMmTMH3t7e8Pb2xpw5c2BhYYHBgwcDANRqNYKDgzFp0iTY29vDzs4OkydPRtOmTdG9e/cq3yYiIiKqfgx+NVZZpk6dipycHIwePRrp6elo27YtDh48qHVsbtGiRTAxMcGgQYOQk5ODbt26Yd26dTA2NjZg5URERFRdKIQQwtBFGFpmZibUajUyMjJ4zg4RlamgoAB5eXmGLoPolWBqalrm4EV5/35X65EdIqLqQgiB1NRUPHjwwNClEL1SatSoARcXlwrdB49hh4ioHIqCjpOTEywsLHgDUqJKJoTAo0ePkJaWBgBaV2brimGHiOg5CgoKpKBjb29v6HKIXhkqlQrAk1vKODk5vfD5uAZ/NhYRUXVXdI6OhYWFgSshevUUfe8qcq4cww4RUTnx0BVR1dPH945hh4iIiGSNYYeIiHSybt061KhRQ6tt1apVcHd3h5GRERYvXmyQul7E7du3oVAoEBcXZ+hSXmp16tSp1vudJygTEVXAlCo+sjW/Gt4ZLTMzE2PHjsXChQvx9ttvQ61WG7okIi0MO0REVCF37txBXl4e3nrrrQpdHkxUWXgYi4hIxgoLCzF37lx4eXlBqVSidu3amD17NgAgOjoaCoVC60aJcXFxUCgUuH37ttS2bt061K5dGxYWFhgwYADu3bunNa9p06YAgLp16xZ7b5Hc3FyMHTsWrq6uMDc3R506dRAeHi7NVygUWLFiBfz8/KBSqeDp6Ykff/xRaxl//fUXAgICYGtrC3t7e/Tr16/YutauXYtGjRrB3NwcDRs2xPLly7Xmnzt3Di1btoS5uTlat26Nixcvas0v6RDdzp07tU6SDQsLQ4sWLfDdd9/B3d0dFhYWeOedd0q94WRhYSFq1aqFlStXarVfuHABCoUCt27dkpZbu3ZtKJVKuLm5Yfz48SUuryRFh+MiIyPh6+sLc3NzNGnSBNHR0VKfgoICBAcHw9PTEyqVCg0aNMC3336rtZygoCD0798f33zzDVxdXWFvb48xY8ZoXQmVlpYGf39/aT9t2rSpWD0LFy5E06ZNYWlpCXd3d4wePRpZWVnS/D///BP+/v6wtbWFpaUlmjRpgr1795Z7e3XFsENEJGOhoaGYO3cuvvjiC1y5cgWbN2+Gs7Nzud//22+/YdiwYRg9ejTi4uLQpUsXzJo1S5ofEBCAw4cPA3gSJFJSUuDu7l5sOUuWLMGuXbuwbds2JCQkYOPGjahTp45Wny+++AJvv/02fv/9d7z//vt47733cPXqVQDAo0eP0KVLF1hZWeHEiRM4deoUrKys0Lt3b+Tm5gIAVq9ejWnTpmH27Nm4evUq5syZgy+++ALr168HAGRnZ6NPnz5o0KABYmNjERYWhsmTJ+v0eRa5ceMGtm3bht27d2P//v2Ii4vDmDFjSuxrZGSEd999t1go2Lx5M9q3b4+6deti+/btWLRoEb777jtcv34dO3fulEKkLqZMmYJJkybh4sWL8PX1Rd++faVwWhS6tm3bhitXruDLL7/E559/jm3btmkt49ixY7h58yaOHTuG9evXY926dVi3bp00PygoCLdv38bRo0exfft2LF++XLrx39PbvGTJEly+fBnr16/H0aNHMXXqVGn+mDFjoNFocOLECcTHx2Pu3LmwsrLSeXvLTZDIyMgQAERGRoahSyGiaignJ0dcuXJF5OTkFJs3GVU76SIzM1MolUqxevXqEucfO3ZMABDp6elS28WLFwUAkZSUJIQQ4r333hO9e/fWel9AQIBQq9Wlvqck48aNE127dhWFhYUlzgcgRo4cqdXWtm1bMWrUKCGEEGvWrBENGjTQer9GoxEqlUocOHBACCGEu7u72Lx5s9YyZs6cKdq3by+EEOK7774TdnZ2Ijs7W5q/YsUKAUBcvHhRCCHE2rVrtbZNCCGioqLE038uZ8yYIYyNjUVycrLUtm/fPmFkZCRSUlJK3L4LFy4IhUIhbt++LYQQoqCgQNSsWVMsW7ZMCCHEggULRP369UVubm6J73+epKQkAUB8/fXXUlteXp6oVauWmDt3bqnvGz16tHj77bel14GBgcLDw0Pk5+dLbe+8844ICAgQQgiRkJAgAIizZ89K869evSoAiEWLFpW6nm3btgl7e3vpddOmTUVYWFi5tq2s7195/35zZIeISKauXr0KjUaDbt26VWgZ7du312p79nV5BAUFIS4uDg0aNMD48eNx8ODBYn1KWk/RyE5sbCxu3LgBa2trWFlZwcrKCnZ2dnj8+DFu3ryJf/75B8nJyQgODpbmW1lZYdasWbh586a0Lc2bN9e6OeSLbAsA1K5dG7Vq1dJaTmFhIRISEkrs37JlSzRs2BBbtmwBABw/fhxpaWkYNGgQAOCdd95BTk4O6tati+HDhyMqKgr5+fk61/X09piYmKB169bSZwgAK1euROvWreHo6AgrKyusXr0ad+7c0VpGkyZNtO5U7OrqKo3cXL16VVpukYYNGxY79Hfs2DH06NEDNWvWhLW1NT744APcu3cP2dnZAIDx48dj1qxZeOONNzBjxgxcunRJ523VBcMOEZFMFd1qvzRGRk/+BAjxf5d4PXuX2qfnVcRrr72GpKQkzJw5Ezk5ORg0aBD+53/+57nvKzpXprCwEK1atUJcXJzWlJiYiMGDB6OwsBDAk0NZT8+/fPkyzp49W+5tMTIyKtavPHfuLaqzrBvgDRkyBJs3bwbw5BBWr1694ODgAABwd3dHQkICli1bBpVKhdGjR6Njx44Vumvws7Vt27YNn3zyCYYNG4aDBw8iLi4OH374oXQYsIipqWmx9xd9vkWfTVnb+eeff+LNN9+Ej48PduzYgdjYWCxbtgzA/32WH330EW7duoWhQ4ciPj4erVu3xtKlSyu8raVh2CEikilvb2+oVCocOXKkxPmOjo4AgJSUFKnt2fvNNG7cWAoLRZ59XV42NjYICAjA6tWrsXXrVuzYsQP3798vdblnz55Fw4YNATwJS9evX4eTkxO8vLy0JrVaDWdnZ9SsWRO3bt0qNt/T01Palt9//x05OTmlrtPR0REPHz6URiBK+kyAJ1eg/f3339LrM2fOwMjICPXr1y91+wcPHoz4+HjExsZi+/btGDJkiNZ8lUqFvn37YsmSJYiOjsaZM2cQHx9f6vJK8vT25OfnIzY2VvoMT548CV9fX4wePRotW7aEl5eXNOpVXo0aNUJ+fj5iYmKktoSEBK2Ts2NiYpCfn48FCxagXbt2qF+/vtZnVcTd3R0jR47ETz/9hEmTJmH16tU61aILhh0iIpkyNzfHp59+iqlTp2LDhg24efMmzp49izVr1gAAvLy84O7ujrCwMCQmJmLPnj1YsGCB1jLGjx+P/fv3Y968eUhMTERERAT279+vcy2LFi1CZGQkrl27hsTERPz4449wcXHROvzx448/4vvvv0diYiJmzJiBc+fOYezYsQCejIo4ODigX79+OHnyJJKSknD8+HFMmDABd+/eBfDkaqbw8HB8++23SExMRHx8PNauXYuFCxcCeBI2jIyMEBwcjCtXrmDv3r345ptvtOps27YtLCws8Pnnn+PGjRvYvHmz1sm5T3+2gYGB+P3333Hy5EmMHz8egwYNgouLS6mfgaenJ3x9fREcHIz8/Hz069dPmrdu3TqsWbMGly9fxq1bt/DDDz9ApVLBw8MDwJMTzT/44IPnfs7Lli1DVFQUrl27hjFjxiA9PR3Dhg0D8GR/x8TE4MCBA0hMTMQXX3yB8+fPP3eZT2vQoAF69+6N4cOH47fffkNsbCw++ugjrVHEevXqIT8/H0uXLpW25dkr0UJCQnDgwAEkJSXhwoULOHr0KBo1aqRTLTop19lBMscTlImoLGWdIFndFRQUiFmzZgkPDw9hamoqateuLebMmSPNP3XqlGjatKkwNzcX/+///T/x448/FjvZeM2aNaJWrVpCpVIJf39/8c033+h8gvKqVatEixYthKWlpbCxsRHdunUTFy5ckOYDEMuWLRM9evQQSqVSeHh4iC1btmgtIyUlRXzwwQfCwcFBKJVKUbduXTF8+HCt392bNm0SLVq0EGZmZsLW1lZ07NhR/PTTT9L8M2fOiObNmwszMzPRokULsWPHDq0TlIV4ckKyl5eXMDc3F3369BGrVq0qdoJy8+bNxfLly4Wbm5swNzcXAwcOFPfv33/u/li2bJkAID744AOt9qioKNG2bVthY2MjLC0tRbt27cThw4el+YGBgaJTp06lLrfoBOXNmzeLtm3bCjMzM9GoUSNx5MgRqc/jx49FUFCQUKvVokaNGmLUqFHis88+E82bN9daT79+/bSWPWHCBK11p6SkiLfeeksolUpRu3ZtsWHDBuHh4aF1gvLChQuFq6urUKlUolevXmLDhg1aJ8OPHTtW1KtXTyiVSuHo6CiGDh0q/v333xK3TR8nKCuE0NMB2ZdYZmYm1Go1MjIyYGNjY+hyiKiaefz4MZKSkuDp6Qlzc3NDlyNLCoUCUVFR6N+/v6FLea6wsDDs3LmzWj1i4vbt2/D09MTFixfRokULQ5ejV2V9/8r795uHsYiIiEjWGHaIiIhI1ngYCzyMRURl42EsIsPhYSwiIiKi52DYISIqJw6EE1U9fXzvGHaIiJ6j6I6yjx49MnAlRK+eou/ds3d21oWJvoohIpIrY2Nj1KhRQ3o+kIWFRZm3yyeiihNC4NGjR0hLS0ONGjW0ntelK4YdIqJyKLozblHgIaKqUaNGjTLvTF0eDDtEROWgUCjg6uoKJycnvTyckYiez9TUtEIjOkUYdoiIdGBsbKyXX75EVHV4gjIRERHJGsMOERERyRrDDhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RERHJmkHDzooVK9CsWTPY2NjAxsYG7du3x759+6T5QUFBUCgUWlO7du20lqHRaDBu3Dg4ODjA0tISffv2xd27d6t6U4iIiKiaMmjYqVWrFr7++mvExMQgJiYGXbt2Rb9+/fDHH39IfXr37o2UlBRp2rt3r9YyQkJCEBUVhcjISJw6dQpZWVno06cPCgoKqnpziIiIqBpSCH08O12P7OzsMH/+fAQHByMoKAgPHjzAzp07S+ybkZEBR0dH/PDDDwgICAAA/P3333B3d8fevXvRq1evcq0zMzMTarUaGRkZsLGx0demEBERUSUq79/vanPOTkFBASIjI5GdnY327dtL7dHR0XByckL9+vUxfPhwrYfwxcbGIi8vDz179pTa3Nzc4OPjg9OnT5e6Lo1Gg8zMTK2JiIiI5MngYSc+Ph5WVlZQKpUYOXIkoqKi0LhxYwCAn58fNm3ahKNHj2LBggU4f/48unbtCo1GAwBITU2FmZkZbG1ttZbp7OyM1NTUUtcZHh4OtVotTe7u7pW3gURERGRQBn8QaIMGDRAXF4cHDx5gx44dCAwMxPHjx9G4cWPp0BQA+Pj4oHXr1vDw8MCePXswcODAUpcphIBCoSh1fmhoKCZOnCi9zszMZOAhIiKSKYOHHTMzM3h5eQEAWrdujfPnz+Pbb7/Fd999V6yvq6srPDw8cP36dQCAi4sLcnNzkZ6erjW6k5aWBl9f31LXqVQqoVQq9bwlREREVB0Z/DDWs4QQ0mGqZ927dw/JyclwdXUFALRq1QqmpqY4dOiQ1CclJQWXL18uM+wQERHRq8OgIzuff/45/Pz84O7ujocPHyIyMhLR0dHYv38/srKyEBYWhrfffhuurq64ffs2Pv/8czg4OGDAgAEAALVajeDgYEyaNAn29vaws7PD5MmT0bRpU3Tv3t2Qm0ZERETVhEHDzn//+18MHToUKSkpUKvVaNasGfbv348ePXogJycH8fHx2LBhAx48eABXV1d06dIFW7duhbW1tbSMRYsWwcTEBIMGDUJOTg66deuGdevWwdjY2IBbRkRERNVFtbvPjiHwPjtEREQvn5fuPjtERERElYFhh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkzaBhZ8WKFWjWrBlsbGxgY2OD9u3bY9++fdJ8IQTCwsLg5uYGlUqFzp07448//tBahkajwbhx4+Dg4ABLS0v07dsXd+/erepNISIiomrKoGGnVq1a+PrrrxETE4OYmBh07doV/fr1kwLNvHnzsHDhQkREROD8+fNwcXFBjx498PDhQ2kZISEhiIqKQmRkJE6dOoWsrCz06dMHBQUFhtosIiIiqkYUQghh6CKeZmdnh/nz52PYsGFwc3NDSEgIPv30UwBPRnGcnZ0xd+5cjBgxAhkZGXB0dMQPP/yAgIAAAMDff/8Nd3d37N27F7169SrXOjMzM6FWq5GRkQEbG5tK2zYiIiLSn/L+/a425+wUFBQgMjIS2dnZaN++PZKSkpCamoqePXtKfZRKJTp16oTTp08DAGJjY5GXl6fVx83NDT4+PlKfkmg0GmRmZmpNREREJE8GDzvx8fGwsrKCUqnEyJEjERUVhcaNGyM1NRUA4OzsrNXf2dlZmpeamgozMzPY2tqW2qck4eHhUKvV0uTu7q7nrSIiIqLqwuBhp0GDBoiLi8PZs2cxatQoBAYG4sqVK9J8hUKh1V8IUaztWc/rExoaioyMDGlKTk6u2EYQERFRtWXwsGNmZgYvLy+0bt0a4eHhaN68Ob799lu4uLgAQLERmrS0NGm0x8XFBbm5uUhPTy+1T0mUSqV0BVjRRERERPJk8LDzLCEENBoNPD094eLigkOHDknzcnNzcfz4cfj6+gIAWrVqBVNTU60+KSkpuHz5stTnVRMeHo42bdrA2toaTk5O6N+/PxISErT6ZGVlYezYsahVqxZUKhUaNWqEFStWaPUZMWIE6tWrB5VKBUdHR/Tr1w/Xrl2ryk0hIiLSCxNDrvzzzz+Hn58f3N3d8fDhQ0RGRiI6Ohr79++HQqFASEgI5syZA29vb3h7e2POnDmwsLDA4MGDAQBqtRrBwcGYNGkS7O3tYWdnh8mTJ6Np06bo3r27ITfNYI4fP44xY8agTZs2yM/Px7Rp09CzZ09cuXIFlpaWAIBPPvkEx44dw8aNG1GnTh0cPHgQo0ePhpubG/r16wfgSZAcMmQIateujfv37yMsLAw9e/ZEUlISjI2NDbmJREREuhEGNGzYMOHh4SHMzMyEo6Oj6Natmzh48KA0v7CwUMyYMUO4uLgIpVIpOnbsKOLj47WWkZOTI8aOHSvs7OyESqUSffr0EXfu3NGpjoyMDAFAZGRk6GW7qpO0tDQBQBw/flxqa9Kkifjqq6+0+r322mti+vTppS7n999/FwDEjRs3Kq1WIiIiXZT373e1u8+OIcj5Pjs3btyAt7c34uPj4ePjAwAYOXIkYmNjsXPnTri5uSE6Ohp9+/bFvn370KFDh2LLyM7OxvTp0/Hzzz/j2rVrMDMzq+rNICIiKualu88O6Z8QAhMnTkSHDh2koAMAS5YsQePGjVGrVi2YmZmhd+/eWL58ebGgs3z5clhZWcHKygr79+/HoUOHGHSIiOilw7AjY2PHjsWlS5ewZcsWrfYlS5bg7Nmz2LVrF2JjY7FgwQKMHj0ahw8f1uo3ZMgQXLx4EcePH4e3tzcGDRqEx48fV+UmEBERVRgPY0Geh7HGjRuHnTt34sSJE/D09JTac3JyoFarERUVhbfeektq/+ijj3D37l3s37+/xOXl5ubC1tYW//u//4v33nuv0usnIiJ6nvL+/Tbo1Vikf0IIjBs3DlFRUYiOjtYKOgCQl5eHvLw8GBlpD+oZGxujsLDwucvWaDR6r5mIiKgyMezIzJgxY7B582b8/PPPsLa2lm7KqFaroVKpYGNjg06dOmHKlClQqVTw8PDA8ePHsWHDBixcuBAAcOvWLWzduhU9e/aEo6Mj/vrrL8ydOxcqlQpvvvmmITePiIhIZzyMBXkdxirtMRlr165FUFAQgCd3pQ4NDcXBgwdx//59eHh44OOPP8Ynn3wChUKBv//+Gx999BFiY2ORnp4OZ2dndOzYEV9++SUaNGhQhVtDRERUuvL+/WbYgbzCDhER0auCl54TERERgefsVAtTyn6Iu2zNf+XHFImIqCpwZIeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGRNp7CTn5+P//znP0hOTq6seoiIiIj0SqewY2Jigvnz56OgoKCy6iEiIiLSK50PY3Xv3h3R0dF6WXl4eDjatGkDa2trODk5oX///khISNDqExQUBIVCoTW1a9dOq49Go8G4cePg4OAAS0tL9O3bF3fv3tVLjURERPRyM9H1DX5+fggNDcXly5fRqlUrWFpaas3v27dvuZd1/PhxjBkzBm3atEF+fj6mTZuGnj174sqVK1rL7d27N9auXSu9NjMz01pOSEgIdu/ejcjISNjb22PSpEno06cPYmNjYWxsrOsmEhERkYwohBBClzcYGZU+GKRQKCp0iOuff/6Bk5MTjh8/jo4dOwJ4MrLz4MED7Ny5s8T3ZGRkwNHRET/88AMCAgIAAH///Tfc3d2xd+9e9OrV67nrzczMhFqtRkZGBmxsbF64/hc1RVHlq6wW5uv0k0dERKStvH+/dT6MVVhYWOpU0XN5MjIyAAB2dnZa7dHR0XByckL9+vUxfPhwpKWlSfNiY2ORl5eHnj17Sm1ubm7w8fHB6dOnS1yPRqNBZmam1kRERETyVG0uPRdCYOLEiejQoQN8fHykdj8/P2zatAlHjx7FggULcP78eXTt2hUajQYAkJqaCjMzM9ja2motz9nZGampqSWuKzw8HGq1Wprc3d0rb8OIiIjIoF4o7Bw/fhz+/v7w8vKCt7c3+vbti5MnT1aokLFjx+LSpUvYsmWLVntAQADeeust+Pj4wN/fH/v27UNiYiL27NlT5vKEEFAoSj4+FBoaioyMDGnipfRERETypXPY2bhxI7p37w4LCwuMHz8eY8eOhUqlQrdu3bB58+YXKmLcuHHYtWsXjh07hlq1apXZ19XVFR4eHrh+/ToAwMXFBbm5uUhPT9fql5aWBmdn5xKXoVQqYWNjozURERGRPOkcdmbPno158+Zh69atGD9+PCZMmICtW7fi66+/xsyZM3ValhACY8eOxU8//YSjR4/C09Pzue+5d+8ekpOT4erqCgBo1aoVTE1NcejQIalPSkoKLl++DF9fX902joiIiGRH57Bz69Yt+Pv7F2vv27cvkpKSdFrWmDFjsHHjRmzevBnW1tZITU1FamoqcnJyAABZWVmYPHkyzpw5g9u3byM6Ohr+/v5wcHDAgAEDAABqtRrBwcGYNGkSjhw5gosXL+L9999H06ZN0b17d103j4iIiGRG5/vsuLu748iRI/Dy8tJqP3LkiM4n+q5YsQIA0LlzZ632tWvXIigoCMbGxoiPj8eGDRvw4MEDuLq6okuXLti6dSusra2l/osWLYKJiQkGDRqEnJwcdOvWDevWreM9doiIiEj3sDNp0iSMHz8ecXFx8PX1hUKhwKlTp7Bu3Tp8++23Oi3rebf4UalUOHDgwHOXY25ujqVLl2Lp0qU6rZ+IiIjkT+ewM2rUKLi4uGDBggXYtm0bAKBRo0bYunUr+vXrp/cCiYiIiCpC57ADAAMGDJDOmSEiIiKqznQ+Qblu3bq4d+9esfYHDx6gbt26eimKiIiISF90Dju3b98u8bEQGo0Gf/31l16KIiIiItKXch/G2rVrl/TvAwcOQK1WS68LCgpw5MgR1KlTR6/FEREREVVUucNO//79ATx5snlgYKDWPFNTU9SpUwcLFizQa3FEREREFVXusFNYWAgA8PT0xPnz5+Hg4FBpRRERERHpi85XY+l6l2QiIiIiQ9L5BOXx48djyZIlxdojIiIQEhKij5qIiIiI9EbnsLNjxw688cYbxdp9fX2xfft2vRRFREREpC86h5179+5pXYlVxMbGBv/++69eiiIiIiLSF53DjpeXF/bv31+sfd++fbypIBEREVU7Op+gPHHiRIwdOxb//PMPunbtCuDJE88XLFiAxYsX67s+IiIiogrROewMGzYMGo0Gs2fPxsyZMwEAderUwYoVK/DBBx/ovUAiIiKiilAIIcSLvvmff/6BSqWClZWVPmuqcpmZmVCr1cjIyICNjU2Vr3+KospXWS3Mf+GfPCIiovL//X6hp54XcXR0rMjbiYiIiCrdC4Wd7du3Y9u2bbhz5w5yc3O15l24cEEvhRERERHpg85XYy1ZsgQffvghnJyccPHiRbz++uuwt7fHrVu34OfnVxk1EhEREb0wncPO8uXLsWrVKkRERMDMzAxTp07FoUOHMH78eGRkZFRGjUREREQvTOewc+fOHfj6+gIAVCoVHj58CAAYOnQotmzZot/qiIiIiCpI57Dj4uKCe/fuAQA8PDxw9uxZAE8eEFqBC7uIiIiIKoXOYadr167YvXs3ACA4OBiffPIJevTogYCAAAwYMEDvBRIRERFVhM5XY61atQqFhYUAgJEjR8LOzg6nTp2Cv78/Ro4cqfcCiYiIiCqiXCM7AwcORGZmJgBg48aNKCgokOYNGjQIS5Yswfjx42FmZlY5VRJRicLDw9GmTRtYW1vDyckJ/fv3R0JCglYfIQTCwsLg5uYGlUqFzp07448//ihxeUII+Pn5QaFQYOfOnVWwBUREla9cYeeXX35BdnY2AODDDz/kVVdE1cTx48cxZswYnD17FocOHUJ+fj569uwpfV8BYN68eVi4cCEiIiJw/vx5uLi4oEePHtLFBU9bvHgxFIpX9JbeRCRb5TqM1bBhQ4SGhqJLly4QQmDbtm2l3paZz8ciqjr79+/Xer127Vo4OTkhNjYWHTt2hBACixcvxrRp0zBw4EAAwPr16+Hs7IzNmzdjxIgR0nt///13LFy4EOfPn4erq2uVbgcRUWUqV9hZuXIlJk6ciD179kChUGD69Okl/u9PoVAw7BAZUNGoq52dHYAnV0mmpqaiZ8+eUh+lUolOnTrh9OnTUth59OgR3nvvPURERMDFxaXqCyciqkTlCju+vr7SJeZGRkZITEyEk5NTpRZGRLoRQmDixIno0KEDfHx8AACpqakAAGdnZ62+zs7O+PPPP6XXn3zyCXx9fdGvX7+qK5iIqIrofDVWUlISHwBKVA2NHTsWly5dwqlTp4rNe3YkVgghte3atQtHjx7FxYsXq6ROIqKqpvN9djw8PHgCI1E1M27cOOzatQvHjh1DrVq1pPaiQ1JFIzxF0tLSpNGeo0eP4ubNm6hRowZMTExgYvLk/0Bvv/02OnfuXDUbQERUiXQOO0RUfQghMHbsWPz00084evQoPD09teZ7enrCxcUFhw4dktpyc3Nx/Phx6bEvn332GS5duoS4uDhpAoBFixZh7dq1VbYtRESVRefDWERUfYwZMwabN2/Gzz//DGtra2kER61WQ6VSQaFQICQkBHPmzIG3tze8vb0xZ84cWFhYYPDgwQCejP6UdFJy7dq1i4UnIqKXEcMO0UtsxYoVAFDscNPatWsRFBQEAJg6dSpycnIwevRopKeno23btjh48CCsra2ruFoiIsNQiBd8emdaWhoSEhKgUChQv379l/rqrMzMTKjVamRkZJR6/6DKNOUVPQVqPp8bS0REFVDev986n7OTmZmJoUOHombNmujUqRM6duyImjVr4v333+edlYmIiKja0TnsfPTRR/jtt9/wyy+/4MGDB8jIyMAvv/yCmJgYDB8+XKdl6eu5PhqNBuPGjYODgwMsLS3Rt29f3L17V9dNIyIiIhnS+TCWpaUlDhw4gA4dOmi1nzx5Er1799Z6Js/z9O7dG++++y7atGmD/Px8TJs2DfHx8bhy5QosLS0BAHPnzsXs2bOxbt061K9fH7NmzcKJEyeQkJAgnXMwatQo7N69G+vWrYO9vT0mTZqE+/fvIzY2FsbGxs+tg4exDONVPYzF/U1EpB/l/fut8wnK9vb2UKvVxdrVajVsbW11WpY+nuuTkZGBNWvW4IcffkD37t0BPHkyu7u7Ow4fPoxevXrpuolEREQkIzofxpo+fTomTpyIlJQUqS01NRVTpkzBF198UaFidH2uDwDExsYiLy9Pq4+bmxt8fHykPs/SaDTIzMzUmoiIiEiedB7ZWbFiBW7cuAEPDw/Url0bAHDnzh0olUr8888/+O6776S+Fy5cKPdyX/S5PqmpqTAzMys2quTs7FzsrrFFwsPD8Z///KfctREREdHLS+ew079//0oo48Wf61OasvqEhoZi4sSJ0uvMzEy4u7u/QNVERERU3ekcdmbMmKH3Ioqe63PixIlSn+vj6uoqtT/9XB8XFxfk5uYiPT1da3QnLS1Nuh3+s5RKJZRKpd63g4iIiKofgz4bSx/P9WnVqhVMTU21+qSkpODy5culhh0iIiJ6deg8smNkZFTmIaSCgoJyL0sfz/VRq9UIDg7GpEmTYG9vDzs7O0yePBlNmzaVrs4iIiKiV5fOYScqKkrrdV5eHi5evIj169frfNKvvp7rs2jRIpiYmGDQoEHIyclBt27dsG7dunLdY4eIiIjk7YWfjfWszZs3Y+vWrfj555/1sbgqxZsKGsarepM57m8iIv2otGdjlaZt27Y4fPiwvhZHREREpBd6CTs5OTlYunSp1pVURERERNWBzufs2Nraap2gLITAw4cPYWFhgY0bN+q1OCIiIqKK0jnsLFq0SCvsGBkZwdHREW3bttX52VhERERElU3nsFN0lRQRERHRy6BcYefSpUvlXmCzZs1euBgiIiIifStX2GnRogUUCgWKrlLX100FiYiIiCpbua7GSkpKwq1bt5CUlISffvoJnp6eWL58OS5evIiLFy9i+fLlqFevHnbs2FHZ9RIRERHppFwjOx4eHtK/33nnHSxZsgRvvvmm1NasWTO4u7vjiy++qLSnohMRERG9CJ3vsxMfH1/sgZ3Ak4d2XrlyRS9FEREREemLzmGnUaNGmDVrFh4/fiy1aTQazJo1C40aNdJrcUREREQVpfOl5ytXroS/vz/c3d3RvHlzAMDvv/8OhUKBX375Re8FEhEREVWEzmHn9ddfR1JSEjZu3Ihr165BCIGAgAAMHjwYlpaWlVEjERER0QvTOewAgIWFBT7++GN910JERESkdy/0INAffvgBHTp0gJubG/78808ATx4j8fPPP+u1OCIiIqKK0jnsrFixAhMnToSfnx/S09Olmwja2tpi8eLF+q6PiIiIqEJ0DjtLly7F6tWrMW3aNJiY/N9RsNatWyM+Pl6vxRERERFVlM5hJykpCS1btizWrlQqkZ2drZeiiIiIiPRF57Dj6emJuLi4Yu379u1D48aN9VETERERkd7ofDXWlClTMGbMGDx+/BhCCJw7dw5btmxBeHg4/vd//7cyaiQiIiJ6YTqHnQ8//BD5+fmYOnUqHj16hMGDB6NmzZr49ttv8e6771ZGjUREREQv7IXuszN8+HAMHz4c//77LwoLC+Hk5KTvuoiIiIj04oXus5Ofn4/Dhw9jx44dUKlUAIC///4bWVlZei2OiIiIqKJ0Htn5888/0bt3b9y5cwcajQY9evSAtbU15s2bh8ePH2PlypWVUScRERHRC9F5ZGfChAlo3bo10tPTpVEdABgwYACOHDmi1+KIiIiIKkrnkZ1Tp07h119/hZmZmVa7h4cH/vrrL70VRkRERKQPOo/sFBYWSo+IeNrdu3dhbW2tl6KIiIiI9EXnsNOjRw+tZ2ApFApkZWVhxowZePPNN/VZGxEREVGF6XwYa9GiRejSpQsaN26Mx48fY/Dgwbh+/TocHBywZcuWyqiRiIiI6IXpHHbc3NwQFxeHLVu24MKFCygsLERwcDCGDBmidcIyERERUXXwQjcVVKlUGDZsGIYNG6bveoiIiIj06oXCTkJCApYuXYqrV69CoVCgYcOGGDt2LBo2bKjv+oiIiIgqROcTlLdv3w4fHx/ExsaiefPmaNasGS5cuICmTZvixx9/rIwaiYiIiF6YziM7U6dORWhoKL766iut9hkzZuDTTz/FO++8o7fiiIiIiCpK55Gd1NRUfPDBB8Xa33//faSmpuqlKCIiIiJ90TnsdO7cGSdPnizWfurUKfy///f/9FIUERERkb7oHHb69u2LTz/9FGPHjsXGjRuxceNGjB07Fp999hkGDBiAXbt2SdPznDhxAv7+/nBzc4NCocDOnTu15gcFBUGhUGhN7dq10+qj0Wgwbtw4ODg4wNLSEn379sXdu3d13SwiIiKSKZ3P2Rk9ejQAYPny5Vi+fHmJ84And1Yu6bEST8vOzkbz5s3x4Ycf4u233y6xT+/evbF27Vrp9bPP5AoJCcHu3bsRGRkJe3t7TJo0CX369EFsbCyMjY112jYiIiKSnxd6NlZ5pucFHQDw8/PDrFmzMHDgwFL7KJVKuLi4SJOdnZ00LyMjA2vWrMGCBQvQvXt3tGzZEhs3bkR8fDwOHz6s66YREVVrzxsNDwsLQ8OGDWFpaQlbW1t0794dv/32m1af1NRUDB06FC4uLrC0tMRrr72G7du3V+FWEFU9ncNOVYuOjoaTkxPq16+P4cOHIy0tTZoXGxuLvLw89OzZU2pzc3ODj48PTp8+XeoyNRoNMjMztSYiouquaDQ8IiKixPn169dHREQE4uPjcerUKdSpUwc9e/bEP//8I/UZOnQoEhISsGvXLsTHx2PgwIEICAjAxYsXq2oziKpcucPOb7/9hn379mm1bdiwAZ6ennBycsLHH38MjUaj1+L8/PywadMmHD16FAsWLMD58+fRtWtXaT2pqakwMzODra2t1vucnZ3LvDIsPDwcarVamtzd3fVaNxFRZXjeaPjgwYPRvXt31K1bF02aNMHChQuRmZmJS5cuSX3OnDmDcePG4fXXX0fdunUxffp01KhRAxcuXKiqzaByquhI3u3bt4ud91o0vWr3xSt32AkLC9P6wsTHxyM4OBjdu3fHZ599ht27dyM8PFyvxQUEBOCtt96Cj48P/P39sW/fPiQmJmLPnj1lvk8IAYVCUer80NBQZGRkSFNycrJe6yYiMrTc3FysWrUKarUazZs3l9o7dOiArVu34v79+ygsLERkZCQ0Gg06d+5suGKpRBUdyXN3d0dKSorW9J///AeWlpbw8/Oryk0xuHKfoBwXF4eZM2dKryMjI9G2bVusXr0awJMPdcaMGQgLC9N7kUVcXV3h4eGB69evAwBcXFyQm5uL9PR0rdGdtLQ0+Pr6lrocpVIJpVJZaXUSERnKL7/8gnfffRePHj2Cq6srDh06BAcHB2n+1q1bERAQAHt7e5iYmMDCwgJRUVGoV6+eAaumkvj5+ZUZSgYPHqz1euHChVizZg0uXbqEbt26wdjYGC4uLlp9oqKiEBAQACsrq0qpuboq98hOeno6nJ2dpdfHjx9H7969pddt2rSp9BGSe/fuITk5Ga6urgCAVq1awdTUFIcOHZL6pKSk4PLly2WGHSIiuerSpQvi4uJw+vRp9O7dG4MGDdI613H69OlIT0/H4cOHERMTg4kTJ+Kdd95BfHy8AaumiiptJO9psbGxiIuLQ3BwcBVXZ3jlDjvOzs5ISkoC8ORDvXDhAtq3by/Nf/jwIUxNTXVaeVZWFuLi4hAXFwcASEpKQlxcHO7cuYOsrCxMnjwZZ86cwe3btxEdHQ1/f384ODhgwIABAAC1Wo3g4GBMmjQJR44cwcWLF/H++++jadOm6N69u061EBHJgaWlJby8vNCuXTusWbMGJiYmWLNmDQDg5s2biIiIwPfff49u3bqhefPmmDFjBlq3bo1ly5YZuHJ6Eb/88gusrKxgbm6ORYsWFRvJe9qaNWvQqFGjV3IwoNxhp3fv3vjss89w8uRJhIaGwsLCQuuOyZcuXdJ5GDQmJgYtW7ZEy5YtAQATJ05Ey5Yt8eWXX8LY2Bjx8fHo168f6tevj8DAQNSvXx9nzpyBtbW1tIxFixahf//+GDRoEN544w1YWFhg9+7dvMcOERGenMNYdFHHo0ePAABGRtq/+o2NjVFYWFjltVHFPW8kr0hOTg42b978So7qADqcs1N0BUCnTp1gZWWF9evXa93g7/vvv9e6BLw8OnfuDCFEqfMPHDjw3GWYm5tj6dKlWLp0qU7rJiJ62WRlZeHGjRvS66LRcDs7O9jb22P27Nno27cvXF1dce/ePSxfvhx3796VHtDcsGFDeHl5YcSIEfjmm29gb2+PnTt34tChQ/jll18MtVlUAUUjeUWjed7e3lizZg1CQ0O1+m3fvh2PHj0q8dmWr4Jyhx1HR0ecPHkSGRkZsLKyKjZy8uOPP75yJzwREVWlmJgYdOnSRXo9ceJEAEBgYCBWrlyJa9euYf369fj3339hb2+PNm3a4OTJk2jSpAkAwNTUFHv37sVnn30Gf39/ZGVlwcvLC+vXr8ebb75pkG0i/Xp6JO9pa9asQd++feHo6GiAqgxP58dFqNXqEtufvrMxERHp3/NGw3/66afnLsPb2xs7duzQZ1lUSSo6klfkxo0bOHHiBPbu3VvVm1Bt6Bx2iIiIqPJVdCSvyPfff4+aNWvqfKqJnChEWf9NeEVkZmZCrVYjIyMDNjY2Vb7+KaXf/1DW5r+iP3nc368W7m+iylPev9/V/tlYRERERBXBw1hERER6wpG86okjO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkawYNOydOnIC/vz/c3NygUCiwc+dOrflCCISFhcHNzQ0qlQqdO3fGH3/8odVHo9Fg3LhxcHBwgKWlJfr27Yu7d+9W4VYQERFRdWbQsJOdnY3mzZsjIiKixPnz5s3DwoULERERgfPnz8PFxQU9evTAw4cPpT4hISGIiopCZGQkTp06haysLPTp0wcFBQVVtRlERERUjZkYcuV+fn7w8/MrcZ4QAosXL8a0adMwcOBAAMD69evh7OyMzZs3Y8SIEcjIyMCaNWvwww8/oHv37gCAjRs3wt3dHYcPH0avXr2qbFuIiIioeqq25+wkJSUhNTUVPXv2lNqUSiU6deqE06dPAwBiY2ORl5en1cfNzQ0+Pj5Sn5JoNBpkZmZqTURERCRP1TbspKamAgCcnZ212p2dnaV5qampMDMzg62tbal9ShIeHg61Wi1N7u7ueq6eiIiIqotqG3aKKBQKrddCiGJtz3pen9DQUGRkZEhTcnKyXmolIiKi6qfahh0XFxcAKDZCk5aWJo32uLi4IDc3F+np6aX2KYlSqYSNjY3WRERERPJUbcOOp6cnXFxccOjQIaktNzcXx48fh6+vLwCgVatWMDU11eqTkpKCy5cvS32IiIjo1WbQq7GysrJw48YN6XVSUhLi4uJgZ2eH2rVrIyQkBHPmzIG3tze8vb0xZ84cWFhYYPDgwQAAtVqN4OBgTJo0Cfb29rCzs8PkyZPRtGlT6eosIiIierUZNOzExMSgS5cu0uuJEycCAAIDA7Fu3TpMnToVOTk5GD16NNLT09G2bVscPHgQ1tbW0nsWLVoEExMTDBo0CDk5OejWrRvWrVsHY2PjKt8eIiIiqn4UQghh6CIMLTMzE2q1GhkZGQY5f2dK2edby9b8V/Qnj/v71cL9/Wrh/q5a5f37XW3P2SEiIiLSB4YdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikrVqHXbCwsKgUCi0JhcXF2m+EAJhYWFwc3ODSqVC586d8ccffxiwYiIiIqpuqnXYAYAmTZogJSVFmuLj46V58+bNw8KFCxEREYHz58/DxcUFPXr0wMOHDw1YMREREVUn1T7smJiYwMXFRZocHR0BPBnVWbx4MaZNm4aBAwfCx8cH69evx6NHj7B582YDV01ERETVRbUPO9evX4ebmxs8PT3x7rvv4tatWwCApKQkpKamomfPnlJfpVKJTp064fTp02UuU6PRIDMzU2siIiIiearWYadt27bYsGEDDhw4gNWrVyM1NRW+vr64d+8eUlNTAQDOzs5a73F2dpbmlSY8PBxqtVqa3N3dK20biIiIyLCqddjx8/PD22+/jaZNm6J79+7Ys2cPAGD9+vVSH4VCofUeIUSxtmeFhoYiIyNDmpKTk/VfPBEREVUL1TrsPMvS0hJNmzbF9evXpauynh3FSUtLKzba8yylUgkbGxutiYiIiOTppQo7Go0GV69ehaurKzw9PeHi4oJDhw5J83Nzc3H8+HH4+voasEoiIiKqTkwMXUBZJk+eDH9/f9SuXRtpaWmYNWsWMjMzERgYCIVCgZCQEMyZMwfe3t7w9vbGnDlzYGFhgcGDBxu6dCIiIqomqnXYuXv3Lt577z38+++/cHR0RLt27XD27Fl4eHgAAKZOnYqcnByMHj0a6enpaNu2LQ4ePAhra2sDV05ERETVRbUOO5GRkWXOVygUCAsLQ1hYWNUURERERC+dl+qcHSIiIiJdMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrJkYuoDqQAgBAMjMzDTI+jUGWavhGejjNjju71cL9/erhfu7qtf7ZMVFf8dLoxDP6/EKuHv3Ltzd3Q1dBhEREb2A5ORk1KpVq9T5DDsACgsL8ffff8Pa2hoKhcLQ5VSZzMxMuLu7Izk5GTY2NoYuhyoZ9/erhfv71fKq7m8hBB4+fAg3NzcYGZV+Zg4PYwEwMjIqMxHKnY2NzSv15XjVcX+/Wri/Xy2v4v5Wq9XP7cMTlImIiEjWGHaIiIhI1hh2XmFKpRIzZsyAUqk0dClUBbi/Xy3c368W7u+y8QRlIiIikjWO7BAREZGsMewQERGRrDHsEBERkawx7FQzderUweLFiw1dBhG9gqKjo6FQKPDgwQNDl0KkVww7ZHC3b9+GQqFAXFycoUuhcgoKCkL//v0NXQYRUbkw7BARVWO5ubmGLoFeUvzZ+T8MO3r03XffoWbNmigsLNRq79u3LwIDA3Hz5k3069cPzs7OsLKyQps2bXD48OFSl1fSiMeDBw+gUCgQHR0ttV25cgVvvvkmrKys4OzsjKFDh+Lff/8tV82FhYWYO3cuvLy8oFQqUbt2bcyePVuaHx8fj65du0KlUsHe3h4ff/wxsrKypPmdO3dGSEiI1jL79++PoKAg6XWdOnUwZ84cDBs2DNbW1qhduzZWrVolzff09AQAtGzZEgqFAp07dwbwZEj99ddfh6WlJWrUqIE33ngDf/75Z7m262VX1n553j4pGnX55ptv4OrqCnt7e4wZMwZ5eXlSH41Gg6lTp8Ld3R1KpRLe3t5Ys2YNAKCgoADBwcHw9PSESqVCgwYN8O2330rvDQsLw/r16/Hzzz9DoVAU+3mksj18+BBDhgyBpaUlXF1dsWjRIq3vUZ06dTBr1iwEBQVBrVZj+PDhAIAdO3agSZMmUCqVqFOnDhYsWKC1XIVCgZ07d2q11ahRA+vWrQPwf79PIiMj4evrC3NzczRp0qTEfffrr7+iefPmMDc3R9u2bREfHw8AyM7Oho2NDbZv367Vf/fu3bC0tMTDhw8r/gG9Ijp37oxx48YhJCQEtra2cHZ2xqpVq5CdnY0PP/wQ1tbWqFevHvbt2ye953m/6zt37oyxY8di4sSJcHBwQI8ePQAAu3btgre3N1QqFbp06YL169cXO1x5+vRpdOzYESqVCu7u7hg/fjyys7Or7POodIL05t69e8LMzEwcPnxYart//74wMzMTBw4cEHFxcWLlypXi0qVLIjExUUybNk2Ym5uLP//8U+rv4eEhFi1aJIQQIikpSQAQFy9elOanp6cLAOLYsWNCCCH+/vtv4eDgIEJDQ8XVq1fFhQsXRI8ePUSXLl3KVfPUqVOFra2tWLdunbhx44Y4efKkWL16tRBCiOzsbOHm5iYGDhwo4uPjxZEjR4Snp6cIDAyU3t+pUycxYcIErWX269dPq4+Hh4ews7MTy5YtE9evXxfh4eHCyMhIXL16VQghxLlz5wQAcfjwYZGSkiLu3bsn8vLyhFqtFpMnTxY3btwQV65cEevWrdP6rOSstP1Snn0SGBgobGxsxMiRI8XVq1fF7t27hYWFhVi1apXUZ9CgQcLd3V389NNP4ubNm+Lw4cMiMjJSCCFEbm6u+PLLL8W5c+fErVu3xMaNG4WFhYXYunWrEEKIhw8fikGDBonevXuLlJQUkZKSIjQaTZV+Pi+zjz76SHh4eIjDhw+L+Ph4MWDAAGFtbS19jzw8PISNjY2YP3++uH79urh+/bqIiYkRRkZG4quvvhIJCQli7dq1QqVSibVr10rLBSCioqK01qVWq6U+Rb9PatWqJbZv3y6uXLkiPvroI2FtbS3+/fdfIYQQx44dEwBEo0aNxMGDB8WlS5dEnz59RJ06dURubq4QQojhw4eLN998U2s9AwYMEB988EGlfF5y1alTJ2FtbS1mzpwpEhMTxcyZM4WRkZHw8/MTq1atEomJiWLUqFHC3t5eZGdnl+t3fadOnYSVlZWYMmWKuHbtmrh69apISkoSpqamYvLkyeLatWtiy5YtombNmgKASE9PF0IIcenSJWFlZSUWLVokEhMTxa+//ipatmwpgoKCDPTp6B/Djp717dtXDBs2THr93XffCRcXF5Gfn19i/8aNG4ulS5dKr3UNO1988YXo2bOn1jKTk5MFAJGQkFBmrZmZmUKpVErh5lmrVq0Stra2IisrS2rbs2ePMDIyEqmpqUKI8oed999/X3pdWFgonJycxIoVK0rdznv37gkAIjo6usxtkKOy9kt59klgYKDw8PDQ+pl75513REBAgBBCiISEBAFAHDp0qNw1jR49Wrz99tvS68DAQNGvXz9dN+2Vl5mZKUxNTcWPP/4otT148EBYWFhohZ3+/ftrvW/w4MGiR48eWm1TpkwRjRs3ll6XN+x8/fXX0vy8vDxRq1YtMXfuXCHE/4WdouArxJPvokqlksLub7/9JoyNjcVff/0lhBDin3/+Eaampq/kd7UiOnXqJDp06CC9zs/PF5aWlmLo0KFSW0pKigAgzpw5U67f9Z06dRItWrTQ6vPpp58KHx8frbZp06ZphZ2hQ4eKjz/+WKvPyZMnhZGRkcjJyanwtlYHPIylZ0OGDMGOHTug0WgAAJs2bcK7774LY2NjZGdnY+rUqWjcuDFq1KgBKysrXLt2DXfu3Hnh9cXGxuLYsWOwsrKSpoYNGwIAbt68WeZ7r169Co1Gg27dupU6v3nz5rC0tJTa3njjDRQWFiIhIUGnOps1ayb9W6FQwMXFBWlpaaX2t7OzQ1BQEHr16gV/f398++23SElJ0WmdL6uy9kt590mTJk1gbGwsvXZ1dZU+77i4OBgbG6NTp06l1rBy5Uq0bt0ajo6OsLKywurVqyv0c0pP3Lp1C3l5eXj99delNrVajQYNGmj1a926tdbrq1ev4o033tBqe+ONN3D9+nUUFBToVEP79u2lf5uYmKB169a4evVqqX3s7OzQoEEDqc/rr7+OJk2aYMOGDQCAH374AbVr10bHjh11qoO0fy8aGxvD3t4eTZs2ldqcnZ0BAGlpaeX+Xf/sz05CQgLatGmj1fb0zx/w5O/IunXrtJbdq1cvFBYWIikpST8ba2Amhi5Abvz9/VFYWIg9e/agTZs2OHnyJBYuXAgAmDJlCg4cOIBvvvkGXl5eUKlU+J//+Z9STyIzMnqSRcVTT/R4+rwL4Mm5Hf7+/pg7d26x97u6upZZq0qlKnO+EAIKhaLEeUXtRkZGWvWVVCMAmJqaFnv/s+c2PWvt2rUYP3489u/fj61bt2L69Ok4dOgQ2rVrV+b7XnZl7Zfy7BOg7M/7eft927Zt+OSTT7BgwQK0b98e1tbWmD9/Pn777bfybgKVoui78uw+fPY79HSYLZr/vPcoFIpyfRdLUtrPVGl9PvroI0REROCzzz7D2rVr8eGHH5ZrGaStpO/p021Fn2lhYWG5f9e/yM9OYWEhRowYgfHjxxdbdu3atcu5NdUbR3b0TKVSYeDAgdi0aRO2bNmC+vXro1WrVgCAkydPIigoCAMGDEDTpk3h4uKC27dvl7osR0dHANAa0Xj28uzXXnsNf/zxB+rUqQMvLy+t6dkf+mcVnbB25MiREuc3btwYcXFxWiep/frrrzAyMkL9+vWlGp+ur6CgAJcvXy5zvc8yMzOT3vusli1bIjQ0FKdPn4aPjw82b96s07JfRmXtl/Lsk+dp2rQpCgsLcfz48RLnnzx5Er6+vhg9ejRatmwJLy+vYqOEZmZmOo8oEFCvXj2Ympri3LlzUltmZiauX79e5vsaN26MU6dOabWdPn0a9evXl0bwnv0uXr9+HY8ePSq2rLNnz0r/zs/PR2xsrDRCUFKf9PR0JCYmavV5//33cefOHSxZsgR//PEHAgMDy6yfKu5Ff9c3bNgQ58+f12qLiYkpcdnPLtfLy0v6/fyyY9ipBEOGDMGePXvw/fff4/3335favby88NNPPyEuLg6///47Bg8eXObohkqlQrt27fD111/jypUrOHHiBKZPn67VZ8yYMbh//z7ee+89nDt3Drdu3cLBgwcxbNiw5/4xMjc3x6effoqpU6diw4YNuHnzJs6ePStdlTNkyBCYm5sjMDAQly9fxrFjxzBu3DgMHTpUGl7t2rUr9uzZgz179uDatWsYPXq0zjckc3Jygkqlwv79+/Hf//4XGRkZSEpKQmhoKM6cOYM///wTBw8eRGJiIho1aqTTsl9GZe2X8uyT56lTpw4CAwMxbNgw7Ny5E0lJSYiOjsa2bdsAPPk5jYmJwYEDB5CYmIgvvvii2C/LOnXq4NKlS0hISMC///5b7hGEV521tTUCAwMxZcoUHDt2DH/88QeGDRsGIyOjMkdGJk2ahCNHjmDmzJlITEzE+vXrERERgcmTJ0t9unbtioiICFy4cAExMTEYOXJksZEDAFi2bBmioqJw7do1jBkzBunp6Rg2bJhWn6+++gpHjhzB5cuXERQUBAcHB637Ktna2mLgwIGYMmUKevbsiVq1alX8w6Eyvejv+hEjRuDatWv49NNPkZiYiG3btklX6BX9zH366ac4c+YMxowZg7i4OFy/fh27du3CuHHjqmLTqoaBzhWStfz8fOHq6ioAiJs3b0rtSUlJokuXLkKlUgl3d3cRERFR7ATfp09QFkKIK1euiHbt2gmVSiVatGghDh48qHWCshBCJCYmigEDBogaNWoIlUolGjZsKEJCQkRhYeFzay0oKBCzZs0SHh4ewtTUVNSuXVvMmTNHmn/p0iXRpUsXYW5uLuzs7MTw4cPFw4cPpfm5ubli1KhRws7OTjg5OYnw8PAST1B+epuEEKJ58+ZixowZ0uvVq1cLd3d3YWRkJDp16iRSU1NF//79haurqzAzMxMeHh7iyy+/FAUFBc/dJjkoa788b5+UdPLwhAkTRKdOnaTXOTk54pNPPpE+Xy8vL/H9998LIYR4/PixCAoKEmq1WtSoUUOMGjVKfPbZZ6J58+bS+9PS0kSPHj2ElZVVsZ9HKltmZqYYPHiwsLCwEC4uLmLhwoXi9ddfF5999pkQouTvixBCbN++XTRu3Fj6eZg/f77W/L/++kv07NlTWFpaCm9vb7F3794ST1DevHmzaNu2rTAzMxONGjUSR44ckZZRdILy7t27RZMmTYSZmZlo06aNiIuLK1bPkSNHBACxbds2/X04r5CSLu4oad/jqRPPn/e7vqRlCiHEzz//LLy8vIRSqRSdO3cWK1asEAC0Tj4+d+6c9J22tLQUzZo1E7Nnz9bnJhuUQohnDt4REVGVyc7ORs2aNbFgwQIEBwdX2npu374NT09PXLx4ES1atKjw8jZt2oQJEybg77//ls2hjlfF7NmzsXLlSiQnJxu6lCrDE5SJiKrQxYsXce3aNbz++uvIyMjAV199BQDo16+fgSsrn0ePHiEpKQnh4eEYMWIEg85LYPny5WjTpg3s7e3x66+/Yv78+Rg7dqyhy6pSPGdHxu7cuaN1KeGzEy8lJjKMb775Bs2bN0f37t2RnZ2NkydPwsHBwdBllcu8efPQokULODs7IzQ01NDlUDlcv34d/fr1Q+PGjTFz5kxMmjQJYWFhhi6rSvEwlozl5+eXebVXnTp1YGLCwT0iIpI3hh0iIiKSNR7GIiIiIllj2CEiIiJZY9ghIiIiWWPYISIiIllj2CEiKkVQUJDWYxKI6OXEsENElS45ORnBwcFwc3ODmZkZPDw8MGHCBNy7d8/QpQF4cndhhUJR7EG73377rfQcISJ6eTHsEFGlunXrFlq3bo3ExERs2bIFN27cwMqVK3HkyBG0b98e9+/fr7R1V/QBpWq1GjVq1NBPMURkMAw7RFSpxowZAzMzMxw8eBCdOnVC7dq14efnh8OHD+Ovv/7CtGnTADy5yeXMmTMxePBgWFlZwc3NDUuXLtVaVkZGBj7++GM4OTnBxsYGXbt2xe+//y7NDwsLQ4sWLfD999+jbt26UCqVEEJg//796NChA2rUqAF7e3v06dMHN2/elN7n6ekJAGjZsiUUCgU6d+4MoPhhLI1Gg/Hjx8PJyQnm5ubo0KGD1hPho6OjoVAocOTIEbRu3RoWFhbw9fVFQkKCvj9WItIBww4RVZr79+/jwIEDGD16NFQqldY8FxcXDBkyBFu3bkXRvU3nz5+PZs2a4cKFCwgNDcUnn3yCQ4cOAQCEEHjrrbeQmpqKvXv3IjY2Fq+99hq6deumNTp048YNbNu2DTt27JAOS2VnZ2PixIk4f/48jhw5AiMjIwwYMACFhYUAgHPnzgEADh8+jJSUFPz0008lbs/UqVOxY8cOrF+/HhcuXICXlxd69epVbHRq2rRpWLBgAWJiYmBiYoJhw4ZV/MMkohdnsOetE5HsnT17VgAQUVFRJc5fuHChACD++9//Cg8PD9G7d2+t+QEBAcLPz08IIcSRI0eEjY2NePz4sVafevXqie+++04IIcSMGTOEqampSEtLK7OutLQ0AUDEx8cLIYRISkoSAMTFixe1+gUGBop+/foJIYTIysoSpqamYtOmTdL83Nxc4ebmJubNmyeEEOLYsWMCgDh8+LDUZ8+ePQKAyMnJKbMmIqo8HNkhIoMR//+IjkKhAAC0b99ea3779u1x9epVAEBsbCyysrJgb2+v9UDbpKQkrUNSHh4ecHR01FrOzZs3MXjwYNStWxc2NjbSYStdHoZ78+ZN5OXl4Y033pDaTE1N8frrr0s1FmnWrJn0b1dXVwBAWlpauddFRPrFp0ASUaXx8vKCQqHAlStXSryE+9q1a7C1tS3zid9FQaiwsBCurq6Ijo4u1ufpk4gtLS2Lzff394e7uztWr14NNzc3FBYWwsfHB7m5ueXelmeD2dPtz7aZmpqWWD8RGQZHdoio0tjb26NHjx5Yvnw5cnJytOalpqZi06ZNCAgIkALB2bNntfqcPXsWDRs2BAC89tprSE1NhYmJCby8vLSmssLSvXv3cPXqVUyfPh3dunVDo0aNkJ6ertXHzMwMAFBQUFDqcry8vGBmZoZTp05JbXl5eYiJiUGjRo3K8WkQkaEw7BBRpYqIiIBGo0GvXr1w4sQJJCcnY//+/ejRowdq1qyJ2bNnS31//fVXzJs3D4mJiVi2bBl+/PFHTJgwAQDQvXt3tG/fHv3798eBAwdw+/ZtnD59GtOnT0dMTEyp67e1tYW9vT1WrVqFGzdu4OjRo5g4caJWHycnJ6hUKuzfvx///e9/kZGRUWw5lpaWGDVqFKZMmYL9+/fjypUrGD58OB49eoTg4GA9fVpEVBkYdoioUnl7eyMmJgb16tVDQEAA6tWrh48//hhdunTBmTNnYGdnJ/WdNGkSYmNj0bJlS8ycORMLFixAr169ADw5HLR371507NgRw4YNQ/369fHuu+/i9u3bcHZ2LnX9RkZGiIyMRGxsLHx8fPDJJ59g/vz5Wn1MTEywZMkSfPfdd3Bzc0O/fv1KXNbXX3+Nt99+G0OHDsVrr72GGzdu4MCBA7C1tdXDJ0VElUUhig5EExEZUJ06dRASEoKQkBBDl0JEMsORHSIiIpI1hh0iIiKSNR7GIiIiIlnjyA4RERHJGsMOERERyRrDDhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RERHJGsMOERERydr/B74jPzrSSA7zAAAAAElFTkSuQmCC",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ax = performance_df.plot.bar(\n",
+    "    color=\"#7400ff\",\n",
+    "    ylim=(1, 400),\n",
+    "    rot=0,\n",
+    "    xlabel=\"Operation\",\n",
+    "    ylabel=\"Speedup factor\",\n",
+    ")\n",
+    "ax.bar_label(ax.containers[0], fmt=\"%.0f\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Cleaning up used memory for later benchmarks\n",
+    "del pdf\n",
+    "del gdf\n",
+    "import gc\n",
+    "\n",
+    "_ = gc.collect()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Strings Performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pd_series = pd.Series(\n",
+    "    np.random.choice(\n",
+    "        [\"123\", \"56.234\", \"Walmart\", \"Costco\", \"rapids ai\"], size=300_000_000\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "gd_series = cudf.from_pandas(pd_series)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pandas_upper, cudf_upper = timeit_pandas_cudf(\n",
+    "    pd_series, gd_series, lambda s: s.str.upper(), number=20\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pandas_contains, cudf_contains = timeit_pandas_cudf(\n",
+    "    pd_series, gd_series, lambda s: s.str.contains(r\"[0-9][a-z]\"), number=20\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pandas_isalpha, cudf_isalpha = timeit_pandas_cudf(\n",
+    "    pd_series, gd_series, lambda s: s.str.isalpha(), number=20\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "performance_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"cudf speedup vs. pandas\": [\n",
+    "            pandas_upper / cudf_upper,\n",
+    "            pandas_contains / cudf_contains,\n",
+    "            pandas_isalpha / cudf_isalpha,\n",
+    "        ],\n",
+    "    },\n",
+    "    index=[\"upper\", \"contains\", \"isalpha\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cudf speedup vs. pandas</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>upper</th>\n",
+       "      <td>1832.120875</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>contains</th>\n",
+       "      <td>1311.758332</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>is_alpha</th>\n",
+       "      <td>5752.301339</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          cudf speedup vs. pandas\n",
+       "upper                 1832.120875\n",
+       "contains              1311.758332\n",
+       "is_alpha              5752.301339"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "performance_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAG2CAYAAACeUpnVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUSElEQVR4nO3deVxU9f4/8NegMAzbyCIMxKqSoiKadhHtKi4oGqLVVZNCLdLMhcg162tSmaapmOHK9Yq5kWaYbbiV21UU0VFUxA1zAyHFQRRB4fP7wx/nNoLK6LDIeT0fj3k8Op/zPud8DozMq8/5nDMKIYQAERERkYyZ1HQHiIiIiGoaAxERERHJHgMRERERyR4DEREREckeAxERERHJHgMRERERyR4DEREREckeAxERERHJHgMRERERyR4DEREREclejQYiT09PKBSKcq9Ro0YBAIQQiI6OhouLC1QqFQIDA3H8+HG9fRQVFWHMmDFwcHCApaUlQkNDcenSJb2avLw8hIeHQ61WQ61WIzw8HDdu3Kiu0yQiIqJarkYDUUpKCrKysqTX1q1bAQD9+/cHAMyaNQtz585FbGwsUlJSoNFoEBQUhJs3b0r7iIqKQmJiIhISErBnzx4UFBQgJCQEJSUlUk1YWBi0Wi2SkpKQlJQErVaL8PDw6j1ZIiIiqrUUtenLXaOiovDzzz/j9OnTAAAXFxdERUVh0qRJAO6PBjk5OWHmzJl49913odPp0LBhQ6xcuRIDBw4EAFy5cgVubm749ddf0bNnT6Snp6N58+ZITk6Gv78/ACA5ORkBAQE4efIkmjZtWjMnS0RERLVG/ZruQJni4mKsWrUKY8eOhUKhwLlz55CdnY0ePXpINUqlEp07d8bevXvx7rvvIjU1FXfv3tWrcXFxQcuWLbF371707NkT+/btg1qtlsIQALRv3x5qtRp79+59aCAqKipCUVGRtFxaWorr16/D3t4eCoWiCn4CREREZGxCCNy8eRMuLi4wMXn4hbFaE4g2btyIGzduYOjQoQCA7OxsAICTk5NenZOTE/7880+pxszMDLa2tuVqyrbPzs6Go6NjueM5OjpKNRWZMWMGPv300yc+HyIiIqo9Ll68CFdX14eurzWBaNmyZejVqxdcXFz02h8cjRFCPHaE5sGaiuoft5/Jkydj7Nix0rJOp4O7uzsuXrwIGxubRx6fiIiIaof8/Hy4ubnB2tr6kXW1IhD9+eef2LZtG3744QepTaPRALg/wuPs7Cy15+TkSKNGGo0GxcXFyMvL0xslysnJQYcOHaSaq1evljtmbm5uudGnv1MqlVAqleXabWxsGIiIiIieMY8bTKkVzyFavnw5HB0d8fLLL0ttXl5e0Gg00p1nwP15Rjt37pTCTtu2bWFqaqpXk5WVhWPHjkk1AQEB0Ol0OHDggFSzf/9+6HQ6qYaIiIjkrcZHiEpLS7F8+XIMGTIE9ev/rzsKhQJRUVGYPn06vL294e3tjenTp8PCwgJhYWEAALVajYiICIwbNw729vaws7PD+PHj4evri+7duwMAfHx8EBwcjGHDhmHJkiUAgOHDhyMkJIR3mBERERGAWhCItm3bhgsXLuDtt98ut27ixIkoLCzEyJEjkZeXB39/f2zZskXvOmBMTAzq16+PAQMGoLCwEN26dUN8fDzq1asn1axevRqRkZHS3WihoaGIjY2t+pMjIiKiZ0Kteg5RbZafnw+1Wg2dTsc5RET0SCUlJbh7925Nd4NIFkxNTfUGQR5U2c/vGh8hIiKqK4QQyM7O5lcDEVWzBg0aQKPRPNVzAhmIiIiMpCwMOTo6wsLCgg9xJapiQgjcvn0bOTk5AKB3V7qhGIiIiIygpKRECkP29vY13R0i2VCpVADuP3LH0dHxkZfPHqVW3HZPRPSsK5szZGFhUcM9IZKfsn93TzN3j4GIiMiIeJmMqPoZ498dAxERERHJHgMREREZXXx8PBo0aKDXtnTpUri5ucHExATz5s2rkX49ifPnz0OhUECr1dZ0V55pnp6etfr3zknVRERVbEI1X0X7qhY+XS4/Px+jR4/G3Llz8dprr0GtVtd0l4j0MBAREVGVu3DhAu7evYuXX375qW6NJqoqvGRGRCRzpaWlmDlzJpo0aQKlUgl3d3d88cUXAIAdO3ZAoVDoPWxSq9VCoVDg/PnzUlt8fDzc3d1hYWGBV155BdeuXdNb5+vrCwBo1KhRuW3LFBcXY/To0XB2doa5uTk8PT0xY8YMab1CocCiRYvQq1cvqFQqeHl5Yf369Xr7uHz5MgYOHAhbW1vY29ujb9++5Y61fPly+Pj4wNzcHM2aNcPChQv11h84cABt2rSBubk52rVrh8OHD+utr+hy4MaNG/Um9kZHR6N169ZYsmQJ3NzcYGFhgf79+z/0oZ2lpaVwdXXF4sWL9doPHToEhUKBc+fOSft1d3eHUqmEi4sLIiMjK9xfRcou/SUkJKBDhw4wNzdHixYtsGPHDqmmpKQEERER8PLygkqlQtOmTfH111/r7Wfo0KHo168fZs+eDWdnZ9jb22PUqFF6d3jl5OSgT58+0u9p9erV5fozd+5c+Pr6wtLSEm5ubhg5ciQKCgqk9X/++Sf69OkDW1tbWFpaokWLFvj1118rfb6GYiAiIpK5yZMnY+bMmZgyZQpOnDiBNWvWwMnJqdLb79+/H2+//TZGjhwJrVaLLl26YNq0adL6gQMHYtu2bQDuh42srCy4ubmV28/8+fOxadMmrFu3DhkZGVi1ahU8PT31aqZMmYLXXnsNR44cwZtvvolBgwYhPT0dAHD79m106dIFVlZW2LVrF/bs2QMrKysEBwejuLgYABAXF4ePP/4YX3zxBdLT0zF9+nRMmTIFK1asAADcunVL+vLv1NRUREdHY/z48Qb9PMucOXMG69atw08//YSkpCRotVqMGjWqwloTExO8/vrr5YLDmjVrEBAQgEaNGuH7779HTEwMlixZgtOnT2Pjxo1S0DTEhAkTMG7cOBw+fBgdOnRAaGioFGDLgtm6detw4sQJfPLJJ/joo4+wbt06vX388ccfOHv2LP744w+sWLEC8fHxiI+Pl9YPHToU58+fx++//47vv/8eCxculB6e+Pdznj9/Po4dO4YVK1bg999/x8SJE6X1o0aNQlFREXbt2oW0tDTMnDkTVlZWBp9vpQmqFJ1OJwAInU5X010holqosLBQnDhxQhQWFpZbNx7V+zJEfn6+UCqVIi4ursL1f/zxhwAg8vLypLbDhw8LACIzM1MIIcSgQYNEcHCw3nYDBw4UarX6odtUZMyYMaJr166itLS0wvUAxIgRI/Ta/P39xXvvvSeEEGLZsmWiadOmetsXFRUJlUolNm/eLIQQws3NTaxZs0ZvH59//rkICAgQQgixZMkSYWdnJ27duiWtX7RokQAgDh8+LIQQYvny5XrnJoQQiYmJ4u8fqVOnThX16tUTFy9elNp+++03YWJiIrKysio8v0OHDgmFQiHOnz8vhBCipKREPPfcc2LBggVCCCHmzJkjnn/+eVFcXFzh9o+TmZkpAIgvv/xSart7965wdXUVM2fOfOh2I0eOFK+99pq0PGTIEOHh4SHu3bsntfXv318MHDhQCCFERkaGACCSk5Ol9enp6QKAiImJeehx1q1bJ+zt7aVlX19fER0dXalze9S/v8p+fnOEiIhIxtLT01FUVIRu3bo91T4CAgL02h5croyhQ4dCq9WiadOmiIyMxJYtW8rVVHScshGi1NRUnDlzBtbW1rCysoKVlRXs7Oxw584dnD17Frm5ubh48SIiIiKk9VZWVpg2bRrOnj0rnYufn5/eAzaf5FwAwN3dHa6urnr7KS0tRUZGRoX1bdq0QbNmzbB27VoAwM6dO5GTk4MBAwYAAPr374/CwkI0atQIw4YNQ2JiIu7du2dwv/5+PvXr10e7du2knyEALF68GO3atUPDhg1hZWWFuLg4XLhwQW8fLVq00HsitLOzszQClJ6eLu23TLNmzcpdZvzjjz8QFBSE5557DtbW1hg8eDCuXbuGW7duAQAiIyMxbdo0dOzYEVOnTsXRo0cNPldDMBAREclY2dcePIyJyf2PCSH+d+vag08D/vu6p/HCCy8gMzMTn3/+OQoLCzFgwAD861//eux2ZXN3SktL0bZtW2i1Wr3XqVOnEBYWhtLSUgD3L5v9ff2xY8eQnJxc6XMxMTEpV1eZJySX9fNRDxF84403sGbNGgD3L5f17NkTDg4OAAA3NzdkZGRgwYIFUKlUGDlyJDp16vRUT2d+sG/r1q3DBx98gLfffhtbtmyBVqvFW2+9JV1yLGNqalpu+7Kfb9nP5lHn+eeff6J3795o2bIlNmzYgNTUVCxYsADA/36W77zzDs6dO4fw8HCkpaWhXbt2+Oabb576XB+GgYiISMa8vb2hUqmwffv2Ctc3bNgQAJCVlSW1Pfg8nubNm0uBosyDy5VlY2ODgQMHIi4uDt999x02bNiA69evP3S/ycnJaNasGYD7ger06dNwdHREkyZN9F5qtRpOTk547rnncO7cuXLrvby8pHM5cuQICgsLH3rMhg0b4ubNm9JIRkU/E+D+nXVXrlyRlvft2wcTExM8//zzDz3/sLAwpKWlITU1Fd9//z3eeOMNvfUqlQqhoaGYP38+duzYgX379iEtLe2h+6vI38/n3r17SE1NlX6Gu3fvRocOHTBy5Ei0adMGTZo0kUbPKsvHxwf37t3DwYMHpbaMjAy9CeUHDx7EvXv3MGfOHLRv3x7PP/+83s+qjJubG0aMGIEffvgB48aNQ1xcnEF9MQQDERGRjJmbm2PSpEmYOHEivv32W5w9exbJyclYtmwZAKBJkyZwc3NDdHQ0Tp06hV9++QVz5szR20dkZCSSkpIwa9YsnDp1CrGxsUhKSjK4LzExMUhISMDJkydx6tQprF+/HhqNRu9Sy/r16/Gf//wHp06dwtSpU3HgwAGMHj0awP3RFQcHB/Tt2xe7d+9GZmYmdu7ciffffx+XLl0CcP8urRkzZuDrr7/GqVOnkJaWhuXLl2Pu3LkA7gcSExMTRERE4MSJE/j1118xe/ZsvX76+/vDwsICH330Ec6cOYM1a9boTSj++892yJAhOHLkCHbv3o3IyEgMGDAAGo3moT8DLy8vdOjQAREREbh37x769u0rrYuPj8eyZctw7NgxnDt3DitXroRKpYKHhweA+5PjBw8e/Nif84IFC5CYmIiTJ09i1KhRyMvLw9tvvw3g/u/74MGD2Lx5M06dOoUpU6YgJSXlsfv8u6ZNmyI4OBjDhg3D/v37kZqainfeeUdvNLJx48a4d+8evvnmG+lcHrzDLioqCps3b0ZmZiYOHTqE33//HT4+Pgb1xSCVmq1EnFRNRI/0qEmdtV1JSYmYNm2a8PDwEKampsLd3V1Mnz5dWr9nzx7h6+srzM3NxT//+U+xfv36chOkly1bJlxdXYVKpRJ9+vQRs2fPNnhS9dKlS0Xr1q2FpaWlsLGxEd26dROHDh2S1gMQCxYsEEFBQUKpVAoPDw+xdu1avX1kZWWJwYMHCwcHB6FUKkWjRo3EsGHD9P52r169WrRu3VqYmZkJW1tb0alTJ/HDDz9I6/ft2yf8/PyEmZmZaN26tdiwYYPepGoh7k+ibtKkiTA3NxchISFi6dKl5SZV+/n5iYULFwoXFxdhbm4uXn31VXH9+vXH/j4WLFggAIjBgwfrtScmJgp/f39hY2MjLC0tRfv27cW2bduk9UOGDBGdO3d+6H7LJlWvWbNG+Pv7CzMzM+Hj4yO2b98u1dy5c0cMHTpUqNVq0aBBA/Hee++JDz/8UPj5+ekdp2/fvnr7fv/99/WOnZWVJV5++WWhVCqFu7u7+Pbbb4WHh4fepOq5c+cKZ2dnoVKpRM+ePcW3336rN4F/9OjRonHjxkKpVIqGDRuK8PBw8ddff1V4bsaYVK0QwkgXf+u4/Px8qNVq6HQ62NjY1HR3iKiWuXPnDjIzM+Hl5QVzc/Oa7k6dpFAokJiYiH79+tV0Vx4rOjoaGzdurFVf93H+/Hl4eXnh8OHDaN26dU13x6ge9e+vsp/fvGRGREREssdARERERLLHS2aVxEtmRPQovGRGVHN4yYyIiIjICBiIiIiMiIPuRNXPGP/uGIiIiIyg7Mm9t2/fruGeEMlP2b+7B5+gbYj6xuoMEZGc1atXDw0aNJC+z8nCwuKRX11ARE9PCIHbt28jJycHDRo00Pt+NUMxEBERGUnZE4jLQhERVY8GDRo88gnglcFARERkJAqFAs7OznB0dDTKF24S0eOZmpo+1chQGQYiIiIjq1evnlH+QBNR9eGkaiIiIpI9BiIiIiKSPQYiIiIikj0GIiIiIpI9BiIiIiKSPQYiIiIikj0GIiIiIpI9BiIiIiKSPQYiIiIikj0GIiIiIpI9BiIiIiKSPQYiIiIikj0GIiIiIpI9BiIiIiKSvRoPRJcvX8abb74Je3t7WFhYoHXr1khNTZXWCyEQHR0NFxcXqFQqBAYG4vjx43r7KCoqwpgxY+Dg4ABLS0uEhobi0qVLejV5eXkIDw+HWq2GWq1GeHg4bty4UR2nSERERLVcjQaivLw8dOzYEaampvjtt99w4sQJzJkzBw0aNJBqZs2ahblz5yI2NhYpKSnQaDQICgrCzZs3pZqoqCgkJiYiISEBe/bsQUFBAUJCQlBSUiLVhIWFQavVIikpCUlJSdBqtQgPD6/O0yUiIqLaStSgSZMmiZdeeumh60tLS4VGoxFffvml1Hbnzh2hVqvF4sWLhRBC3LhxQ5iamoqEhASp5vLly8LExEQkJSUJIYQ4ceKEACCSk5Olmn379gkA4uTJk5Xqq06nEwCETqcz6ByJiIio5lT287tGR4g2bdqEdu3aoX///nB0dESbNm0QFxcnrc/MzER2djZ69OghtSmVSnTu3Bl79+4FAKSmpuLu3bt6NS4uLmjZsqVUs2/fPqjVavj7+0s17du3h1qtlmoeVFRUhPz8fL0XERER1U01GojOnTuHRYsWwdvbG5s3b8aIESMQGRmJb7/9FgCQnZ0NAHByctLbzsnJSVqXnZ0NMzMz2NraPrLG0dGx3PEdHR2lmgfNmDFDmm+kVqvh5ub2dCdLRESyER0dDYVCoffSaDTS+gfXlb2++uorqSYwMLDc+tdff11af/78eURERMDLywsqlQqNGzfG1KlTUVxcXK3nWlfUr8mDl5aWol27dpg+fToAoE2bNjh+/DgWLVqEwYMHS3UKhUJvOyFEubYHPVhTUf2j9jN58mSMHTtWWs7Pz2coIiKiSmvRogW2bdsmLderV0/676ysLL3a3377DREREXjttdf02ocNG4bPPvtMWlapVNJ/nzx5EqWlpViyZAmaNGmCY8eOYdiwYbh16xZmz55t7NOp82o0EDk7O6N58+Z6bT4+PtiwYQMASGk6Ozsbzs7OUk1OTo40aqTRaFBcXIy8vDy9UaKcnBx06NBBqrl69Wq54+fm5pYbfSqjVCqhVCqf4uyIiEjO6tevrzcq9HcPtv/444/o0qULGjVqpNduYWHx0H0EBwcjODhYWm7UqBEyMjKwaNEiBqInUKOXzDp27IiMjAy9tlOnTsHDwwMA4OXlBY1Gg61bt0rri4uLsXPnTinstG3bFqampno1WVlZOHbsmFQTEBAAnU6HAwcOSDX79++HTqeTaoiIiIzp9OnTcHFxgZeXF15//XWcO3euwrqrV6/il19+QURERLl1q1evhoODA1q0aIHx48fr3WFdEZ1OBzs7O6P0X3aqY4b3wxw4cEDUr19ffPHFF+L06dNi9erVwsLCQqxatUqq+fLLL4VarRY//PCDSEtLE4MGDRLOzs4iPz9fqhkxYoRwdXUV27ZtE4cOHRJdu3YVfn5+4t69e1JNcHCwaNWqldi3b5/Yt2+f8PX1FSEhIZXuK+8yIyKiyvr111/F999/L44ePSq2bt0qOnfuLJycnMRff/1VrnbmzJnC1tZWFBYW6rUvXbpUbN26VaSlpYm1a9cKT09P0b1794ce88yZM8LGxkbExcUZ/XyeZZX9/K7RQCSEED/99JNo2bKlUCqVolmzZmLp0qV660tLS8XUqVOFRqMRSqVSdOrUSaSlpenVFBYWitGjRws7OzuhUqlESEiIuHDhgl7NtWvXxBtvvCGsra2FtbW1eOONN0ReXl6l+8lARERET6qgoEA4OTmJOXPmlFvXtGlTMXr06Mfu4+DBgwKASE1NLbfu8uXLokmTJiIiIsIo/a1LKvv5rRBCiBodonpG5OfnQ61WQ6fTwcbGpqa7Q0REz5igoCA0adIEixYtktp2796NTp06QavVws/P75HbCyGgVCqxcuVKDBw4UGq/cuUKunTpAn9/f8THx8PEpMa/hKJWqeznN39qREREVayoqAjp6el6NwgBwLJly9C2bdvHhiEAOH78OO7evau3j8uXLyMwMBAvvPACli9fzjD0FGr0LjMiIqK6aPz48ejTpw/c3d2Rk5ODadOmIT8/H0OGDJFq8vPzsX79esyZM6fc9mfPnsXq1avRu3dvODg44MSJExg3bhzatGmDjh07Arg/MhQYGAh3d3fMnj0bubm50vYPuzONHo6BiIiIyMguXbqEQYMG4a+//kLDhg3Rvn17JCcnS3dRA0BCQgKEEBg0aFC57c3MzLB9+3Z8/fXXKCgogJubG15++WVMnTpVep7Rli1bcObMGZw5cwaurq5623M2jOE4h6iSOIeIiIjo2cM5RERERESVxEBEREREssc5REREJDsTHv11mFRJX9WhSTccISIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2avRQBQdHQ2FQqH30mg00nohBKKjo+Hi4gKVSoXAwEAcP35cbx9FRUUYM2YMHBwcYGlpidDQUFy6dEmvJi8vD+Hh4VCr1VCr1QgPD8eNGzeq4xSJiIjoGVDjI0QtWrRAVlaW9EpLS5PWzZo1C3PnzkVsbCxSUlKg0WgQFBSEmzdvSjVRUVFITExEQkIC9uzZg4KCAoSEhKCkpESqCQsLg1arRVJSEpKSkqDVahEeHl6t50lERES1V/0a70D9+nqjQmWEEJg3bx4+/vhjvPrqqwCAFStWwMnJCWvWrMG7774LnU6HZcuWYeXKlejevTsAYNWqVXBzc8O2bdvQs2dPpKenIykpCcnJyfD39wcAxMXFISAgABkZGWjatGn1nSwRERHVSjU+QnT69Gm4uLjAy8sLr7/+Os6dOwcAyMzMRHZ2Nnr06CHVKpVKdO7cGXv37gUApKam4u7du3o1Li4uaNmypVSzb98+qNVqKQwBQPv27aFWq6WaihQVFSE/P1/vRURERHVTjQYif39/fPvtt9i8eTPi4uKQnZ2NDh064Nq1a8jOzgYAODk56W3j5OQkrcvOzoaZmRlsbW0fWePo6Fju2I6OjlJNRWbMmCHNOVKr1XBzc3uqcyUiIqLaq0YDUa9evfDaa6/B19cX3bt3xy+//ALg/qWxMgqFQm8bIUS5tgc9WFNR/eP2M3nyZOh0Oul18eLFSp0TERERPXtq/JLZ31laWsLX1xenT5+W5hU9OIqTk5MjjRppNBoUFxcjLy/vkTVXr14td6zc3Nxyo09/p1QqYWNjo/ciIiKiuqlWBaKioiKkp6fD2dkZXl5e0Gg02Lp1q7S+uLgYO3fuRIcOHQAAbdu2hampqV5NVlYWjh07JtUEBARAp9PhwIEDUs3+/fuh0+mkGiIiIpK3Gr3LbPz48ejTpw/c3d2Rk5ODadOmIT8/H0OGDIFCoUBUVBSmT58Ob29veHt7Y/r06bCwsEBYWBgAQK1WIyIiAuPGjYO9vT3s7Owwfvx46RIcAPj4+CA4OBjDhg3DkiVLAADDhw9HSEgI7zAjIiIiADUciC5duoRBgwbhr7/+QsOGDdG+fXskJyfDw8MDADBx4kQUFhZi5MiRyMvLg7+/P7Zs2QJra2tpHzExMahfvz4GDBiAwsJCdOvWDfHx8ahXr55Us3r1akRGRkp3o4WGhiI2NrZ6T5aIiIhqLYUQQtR0J54F+fn5UKvV0Ol0nE9ERPSMm/Doe3Ookr56BhJEZT+/a9UcIiIiIqKawEBEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREsmdQILp37x4+/fRTXLx4sar6Q0RERFTtDApE9evXx1dffYWSkpKq6g8RERFRtTP4kln37t2xY8eOKugKERERUc2ob+gGvXr1wuTJk3Hs2DG0bdsWlpaWeutDQ0ON1jkiIiKi6qAQQghDNjAxefigkkKhqLOX0/Lz86FWq6HT6WBjY1PT3SEioqcwQVHTPagbvjIoQdSMyn5+GzxCVFpa+lQdIyIiIqpteNs9ERERyd4TBaKdO3eiT58+aNKkCby9vREaGordu3cbu29ERERE1cLgQLRq1Sp0794dFhYWiIyMxOjRo6FSqdCtWzesWbOmKvpIREREVKUMnlTt4+OD4cOH44MPPtBrnzt3LuLi4pCenm7UDtYWnFRNRFR3cFK1cdSlSdUGjxCdO3cOffr0KdceGhqKzMxMQ3dHREREVOMMDkRubm7Yvn17ufbt27fDzc3NKJ0iIiIiqk4G33Y/btw4REZGQqvVokOHDlAoFNizZw/i4+Px9ddfV0UfiYiIiKqUwYHovffeg0ajwZw5c7Bu3ToA9+cVfffdd+jbt6/RO0hERERU1QwORADwyiuv4JVXXjF2X4iIiIhqhMFziBo1aoRr166Va79x4wYaNWpklE4RERERVSeDA9H58+cr/L6yoqIiXL582SidIiIiIqpOlb5ktmnTJum/N2/eDLVaLS2XlJRg+/bt8PT0NGrniIiIiKpDpQNRv379ANz/RvshQ4borTM1NYWnpyfmzJlj1M4RERERVYdKB6Kyb7n38vJCSkoKHBwcqqxTRERERNXJ4LvM+DRqIiIiqmsMnlQdGRmJ+fPnl2uPjY1FVFTUE3dkxowZUCgUevsQQiA6OhouLi5QqVQIDAzE8ePH9bYrKirCmDFj4ODgAEtLS4SGhuLSpUt6NXl5eQgPD4darYZarUZ4eDhu3LjxxH0lIiKiusXgQLRhwwZ07NixXHuHDh3w/fffP1EnUlJSsHTpUrRq1UqvfdasWZg7dy5iY2ORkpICjUaDoKAg3Lx5U6qJiopCYmIiEhISsGfPHhQUFCAkJETvTriwsDBotVokJSUhKSkJWq0W4eHhT9RXIiIiqnsMDkTXrl3Tu8OsjI2NDf766y+DO1BQUIA33ngDcXFxsLW1ldqFEJg3bx4+/vhjvPrqq2jZsiVWrFiB27dvY82aNQAAnU6HZcuWYc6cOejevTvatGmDVatWIS0tDdu2bQMApKenIykpCf/+978REBCAgIAAxMXF4eeff0ZGRobB/SUiIqK6x+BA1KRJEyQlJZVr/+23357owYyjRo3Cyy+/jO7du+u1Z2ZmIjs7Gz169JDalEolOnfujL179wIAUlNTcffuXb0aFxcXtGzZUqrZt28f1Go1/P39pZr27dtDrVZLNRUpKipCfn6+3ouIiIjqJoMnVY8dOxajR49Gbm4uunbtCuD+N93PmTMH8+bNM2hfCQkJOHToEFJSUsqty87OBgA4OTnptTs5OeHPP/+UaszMzPRGlspqyrbPzs6Go6Njuf07OjpKNRWZMWMGPv30U4POh4iIiJ5NBgeit99+G0VFRfjiiy/w+eefAwA8PT2xaNEiDB48uNL7uXjxIt5//31s2bIF5ubmD61TKBR6y0KIcm0PerCmovrH7Wfy5MkYO3astJyfnw83N7dHHpeIiIieTU/05a7vvfce3nvvPeTm5kKlUsHKysrgfaSmpiInJwdt27aV2kpKSrBr1y7ExsZK83uys7Ph7Ows1eTk5EijRhqNBsXFxcjLy9MbJcrJyUGHDh2kmqtXr5Y7fm5ubrnRp79TKpVQKpUGnxcRERE9ewyeQ/R3DRs2fKIwBADdunVDWloatFqt9GrXrh3eeOMNaLVaNGrUCBqNBlu3bpW2KS4uxs6dO6Ww07ZtW5iamurVZGVl4dixY1JNQEAAdDodDhw4INXs378fOp1OqiEiIiJ5e6IRou+//x7r1q3DhQsXUFxcrLfu0KFDldqHtbU1WrZsqddmaWkJe3t7qT0qKgrTp0+Ht7c3vL29MX36dFhYWCAsLAwAoFarERERgXHjxsHe3h52dnYYP348fH19pUnaPj4+CA4OxrBhw7BkyRIAwPDhwxESEoKmTZs+yekTERFRHWPwCNH8+fPx1ltvwdHREYcPH8Y//vEP2Nvb49y5c+jVq5dROzdx4kRERUVh5MiRaNeuHS5fvowtW7bA2tpaqomJiUG/fv0wYMAAdOzYERYWFvjpp59Qr149qWb16tXw9fVFjx490KNHD7Rq1QorV640al+JiIjo2aUQQghDNmjWrBmmTp2KQYMGwdraGkeOHEGjRo3wySef4Pr164iNja2qvtao/Px8qNVq6HQ62NjY1HR3iIjoKUx49L05VElfGZQgakZlP78NHiG6cOGCNPdGpVJJT40ODw/H2rVrn7C7RERERDXH4ECk0Whw7do1AICHhweSk5MB3H+QooGDTURERES1gsGBqGvXrvjpp58AABEREfjggw8QFBSEgQMH4pVXXjF6B4mIiIiqmsF3mS1duhSlpaUAgBEjRsDOzg579uxBnz59MGLECKN3kIiIiKiqVWqE6NVXX5W+y2vVqlV63yQ/YMAAzJ8/H5GRkTAzM6uaXhIRERFVoUoFop9//hm3bt0CALz11lvQ6XRV2ikiIiKi6lSpS2bNmjXD5MmT0aVLFwghsG7duofeumbI95kRERER1QaVeg7R3r17MXbsWJw9exbXr1+HtbV1hV+MqlAocP369SrpaE3jc4iIiOoOPofIOOrSc4gqNULUoUMH6fZ6ExMTnDp1Co6OjsbpKREREVENM/i2+8zMTDRs2LAq+kJERERUIwy+7d7Dw6Mq+kFERERUYwweISIiIiKqaxiIiIiISPYYiIiIiEj2DJ5DVCYnJwcZGRlQKBR4/vnnedcZERERPbMMHiHKz89HeHg4nnvuOXTu3BmdOnXCc889hzfffJNPsCYiIqJnksGB6J133sH+/fvx888/48aNG9DpdPj5559x8OBBDBs2rCr6SERERFSlDL5k9ssvv2Dz5s146aWXpLaePXsiLi4OwcHBRu0cERERUXUweITI3t4earW6XLtarYatra1ROkVERERUnQwORP/3f/+HsWPHIisrS2rLzs7GhAkTMGXKFKN2joiIiKg6GHzJbNGiRThz5gw8PDzg7u4OALhw4QKUSiVyc3OxZMkSqfbQoUPG6ykRERFRFTE4EPXr168KukFERERUcwwORFOnTq2KfhARERHVGD6pmoiIiGTP4BEiExMTKBSKh64vKSl5qg4RERERVTeDA1FiYqLe8t27d3H48GGsWLECn376qdE6RkRERFRdDA5Effv2Ldf2r3/9Cy1atMB3332HiIgIo3SMiIiIqLoYbQ6Rv78/tm3bZqzdEREREVUbowSiwsJCfPPNN3B1dTXG7oiIiIiqlcGXzGxtbfUmVQshcPPmTVhYWGDVqlVG7RwRERFRdTA4EMXExOgFIhMTEzRs2BD+/v78LjMiIiJ6JhkciIYOHVoF3SAiIiKqOZUKREePHq30Dlu1avXEnSEiIiKqCZUKRK1bt4ZCoYAQAgD4YEYiIiKqUyp1l1lmZibOnTuHzMxM/PDDD/Dy8sLChQtx+PBhHD58GAsXLkTjxo2xYcOGqu4vERERkdFVaoTIw8ND+u/+/ftj/vz56N27t9TWqlUruLm5YcqUKejXr5/RO0lERERUlQx+DlFaWhq8vLzKtXt5eeHEiRNG6RQRERFRdTI4EPn4+GDatGm4c+eO1FZUVIRp06bBx8fHqJ0jIiIiqg4G33a/ePFi9OnTB25ubvDz8wMAHDlyBAqFAj///LPRO0hERERU1QwORP/4xz+QmZmJVatW4eTJkxBCYODAgQgLC4OlpWVV9JGIiIioShkciADAwsICw4cPN3ZfiIiIiGrEE32568qVK/HSSy/BxcUFf/75J4D7X+nx448/GrVzRERERNXB4EC0aNEijB07Fr169UJeXp70IEZbW1vMmzfP4H21atUKNjY2sLGxQUBAAH777TdpvRAC0dHRcHFxgUqlQmBgII4fP663j6KiIowZMwYODg6wtLREaGgoLl26pFeTl5eH8PBwqNVqqNVqhIeH48aNG4aeOhEREdVRBgeib775BnFxcfj4449Rv/7/rri1a9cOaWlpBu3L1dUVX375JQ4ePIiDBw+ia9eu6Nu3rxR6Zs2ahblz5yI2NhYpKSnQaDQICgrCzZs3pX1ERUUhMTERCQkJ2LNnDwoKChASEqL3xOywsDBotVokJSUhKSkJWq0W4eHhhp46ERER1VEKUfZ9HJWkUqlw8uRJeHh4wNraGkeOHEGjRo1w+vRptGrVCoWFhU/VITs7O3z11Vd4++234eLigqioKEyaNAnA/dEgJycnzJw5E++++y50Oh0aNmyIlStXYuDAgQCAK1euwM3NDb/++it69uyJ9PR0NG/eHMnJyfD39wcAJCcnIyAgACdPnkTTpk0r1a/8/Hyo1WrodDrY2Ng81TkSEVHNmvDwb6AiA3xlUIKoGZX9/DZ4hMjLywtarbZc+2+//YbmzZsbujtJSUkJEhIScOvWLQQEBCAzMxPZ2dno0aOHVKNUKtG5c2fs3bsXAJCamoq7d+/q1bi4uKBly5ZSzb59+6BWq6UwBADt27eHWq2WaipSVFSE/Px8vRcRERHVTQbfZTZhwgSMGjUKd+7cgRACBw4cwNq1azFjxgz8+9//NrgDaWlpCAgIwJ07d2BlZYXExEQ0b95cCitOTk569U5OTtJE7uzsbJiZmcHW1rZcTXZ2tlTj6OhY7riOjo5STUVmzJiBTz/91ODzISIiomePwYHorbfewr179zBx4kTcvn0bYWFheO655/D111/j9ddfN7gDTZs2hVarxY0bN7BhwwYMGTIEO3fulNYrFPrjmkKIcm0PerCmovrH7Wfy5MkYO3astJyfnw83N7fHng8RERE9e57oOUTDhg3DsGHD8Ndff6G0tLTCEZjKMjMzQ5MmTQDcn5idkpKCr7/+Wpo3lJ2dDWdnZ6k+JydHGjXSaDQoLi5GXl6e3ihRTk4OOnToINVcvXq13HFzc3PLjT79nVKphFKpfOLzIiIiomfHEz2H6N69e9i2bRs2bNgAlUoF4P5k5oKCgqfukBACRUVF8PLygkajwdatW6V1xcXF2LlzpxR22rZtC1NTU72arKwsHDt2TKoJCAiATqfDgQMHpJr9+/dDp9NJNURERCRvBo8Q/fnnnwgODsaFCxdQVFSEoKAgWFtbY9asWbhz5w4WL15c6X199NFH6NWrF9zc3HDz5k0kJCRgx44dSEpKgkKhQFRUFKZPnw5vb294e3tj+vTpsLCwQFhYGABArVYjIiIC48aNg729Pezs7DB+/Hj4+vqie/fuAO5/GW1wcDCGDRuGJUuWAACGDx+OkJCQSt9hRkRERHWbwYHo/fffR7t27XDkyBHY29tL7a+88greeecdg/Z19epVhIeHIysrC2q1Gq1atUJSUhKCgoIAABMnTkRhYSFGjhyJvLw8+Pv7Y8uWLbC2tpb2ERMTg/r162PAgAEoLCxEt27dEB8fj3r16kk1q1evRmRkpHQ3WmhoKGJjYw09dSIiIqqjDH4OkYODA/773/+iadOmes8hOn/+PJo3b47bt29XVV9rFJ9DRERUd/A5RMYh6+cQlZaW6j0FusylS5f0Rm6IiIiInhUGB6KgoCC97yxTKBQoKCjA1KlT0bt3b2P2jYiIiKhaGDyHKCYmBl26dEHz5s1x584dhIWF4fTp03BwcMDatWuroo9EREREVcrgQOTi4gKtVou1a9fi0KFDKC0tRUREBN544w3pFnwiIiKiZ4nBk6rlipOqiYjqDk6qNo66NKn6iZ5UnZGRgW+++Qbp6elQKBRo1qwZRo8ejWbNmj1xh4mIiIhqisGTqr///nu0bNkSqamp8PPzQ6tWrXDo0CH4+vpi/fr1VdFHIiIioipl8AjRxIkTMXnyZHz22Wd67VOnTsWkSZPQv39/o3WOiIiIqDoYPEKUnZ2NwYMHl2t/8803kZ2dbZROEREREVUngwNRYGAgdu/eXa59z549+Oc//2mUThERERFVJ4MvmYWGhmLSpElITU1F+/btAQDJyclYv349Pv30U2zatEmvloiIiKi2M/i2exOTyg0qKRSKCr/i41nF2+6JiOoO3nZvHLK+7b60tPSpOkZERERU2xg8h4iIiIiorql0INq/fz9+++03vbZvv/0WXl5ecHR0xPDhw1FUVGT0DhIRERFVtUoHoujoaBw9elRaTktLQ0REBLp3744PP/wQP/30E2bMmFElnSQiIiKqSpUORFqtFt26dZOWExIS4O/vj7i4OIwdOxbz58/HunXrqqSTRERERFWp0oEoLy8PTk5O0vLOnTsRHBwsLb/44ou4ePGicXtHREREVA0qHYicnJyQmZkJACguLsahQ4cQEBAgrb958yZMTU2N30MiIiKiKlbpQBQcHIwPP/wQu3fvxuTJk2FhYaH3ZOqjR4+icePGVdJJIiIioqpU6ecQTZs2Da+++io6d+4MKysrrFixAmZmZtL6//znP+jRo0eVdJKIiIioKlU6EDVs2BC7d++GTqeDlZUV6tWrp7d+/fr1sLKyMnoHiYiIiKqawU+qVqvVFbbb2dk9dWeIiIiIagKfVE1ERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyV6OBaMaMGXjxxRdhbW0NR0dH9OvXDxkZGXo1QghER0fDxcUFKpUKgYGBOH78uF5NUVERxowZAwcHB1haWiI0NBSXLl3Sq8nLy0N4eDjUajXUajXCw8Nx48aNqj7FOmnXrl3o06cPXFxcoFAosHHjRr31BQUFGD16NFxdXaFSqeDj44NFixbp1bz77rto3LgxVCoVGjZsiL59++LkyZPS+vPnzyMiIgJeXl5QqVRo3Lgxpk6diuLi4uo4RSIikpkaDUQ7d+7EqFGjkJycjK1bt+LevXvo0aMHbt26JdXMmjULc+fORWxsLFJSUqDRaBAUFISbN29KNVFRUUhMTERCQgL27NmDgoIChISEoKSkRKoJCwuDVqtFUlISkpKSoNVqER4eXq3nW1fcunULfn5+iI2NrXD9Bx98gKSkJKxatQrp6en44IMPMGbMGPz4449STdu2bbF8+XKkp6dj8+bNEEKgR48e0u/s5MmTKC0txZIlS3D8+HHExMRg8eLF+Oijj6rlHImISF4UQghR050ok5ubC0dHR+zcuROdOnWCEAIuLi6IiorCpEmTANwfDXJycsLMmTPx7rvvQqfToWHDhli5ciUGDhwIALhy5Qrc3Nzw66+/omfPnkhPT0fz5s2RnJwMf39/AEBycjICAgJw8uRJNG3a9LF9y8/Ph1qthk6ng42NTdX9EJ4xCoUCiYmJ6Nevn9TWsmVLDBw4EFOmTJHa2rZti969e+Pzzz+vcD9Hjx6Fn58fzpw5g8aNG1dY89VXX2HRokU4d+6cUc+BiORngqKme1A3fFVrEsTDVfbzu1bNIdLpdAAAOzs7AEBmZiays7PRo0cPqUapVKJz587Yu3cvACA1NRV3797Vq3FxcUHLli2lmn379kGtVkthCADat28PtVot1TyoqKgI+fn5ei+qnJdeegmbNm3C5cuXIYTAH3/8gVOnTqFnz54V1t+6dQvLly+Hl5cX3NzcHrpfnU4nvTeIiIiMqdYEIiEExo4di5deegktW7YEAGRnZwMAnJyc9GqdnJykddnZ2TAzM4Otre0jaxwdHcsd09HRUap50IwZM6T5Rmq1+pEf1KRv/vz5aN68OVxdXWFmZobg4GAsXLgQL730kl7dwoULYWVlBSsrKyQlJWHr1q0wMzOrcJ9nz57FN998gxEjRlTHKRARkczUmkA0evRoHD16FGvXri23TqHQH9sUQpRre9CDNRXVP2o/kydPhk6nk14XL16szGkQ7gei5ORkbNq0CampqZgzZw5GjhyJbdu26dW98cYbOHz4MHbu3Alvb28MGDAAd+7cKbe/K1euIDg4GP3798c777xTXadBREQyUr+mOwAAY8aMwaZNm7Br1y64urpK7RqNBsD9ER5nZ2epPScnRxo10mg0KC4uRl5ent4oUU5ODjp06CDVXL16tdxxc3Nzy40+lVEqlVAqlU9/cjJTWFiIjz76CImJiXj55ZcBAK1atYJWq8Xs2bPRvXt3qbZs9M3b2xvt27eHra0tEhMTMWjQIKnmypUr6NKlCwICArB06dJqPx8iIpKHGh0hEkJg9OjR+OGHH/D777/Dy8tLb72Xlxc0Gg22bt0qtRUXF2Pnzp1S2Gnbti1MTU31arKysnDs2DGpJiAgADqdDgcOHJBq9u/fD51OJ9WQcdy9exd3796FiYn+W6tevXooLS195LZCCBQVFUnLly9fRmBgIF544QUsX7683D6JiIiMpUZHiEaNGoU1a9bgxx9/hLW1tTSfR61WQ6VSQaFQICoqCtOnT4e3tze8vb0xffp0WFhYICwsTKqNiIjAuHHjYG9vDzs7O4wfPx6+vr7SaISPjw+Cg4MxbNgwLFmyBAAwfPhwhISEVOoOM9JXUFCAM2fOSMuZmZnQarWws7ODu7s7OnfujAkTJkClUsHDwwM7d+7Et99+i7lz5wIAzp07h++++w49evRAw4YNcfnyZcycORMqlQq9e/cGcH9kKDAwEO7u7pg9ezZyc3Ol45WNHBIRERlLjd52/7D5O8uXL8fQoUMB3B81+PTTT7FkyRLk5eXB398fCxYskCZeA8CdO3cwYcIErFmzBoWFhejWrRsWLlyoNxH6+vXriIyMxKZNmwAAoaGhiI2NRYMGDSrVV952/z87duxAly5dyrUPGTIE8fHxyM7OxuTJk7FlyxZcv34dHh4eGD58OD744AMoFApcuXIF77zzDlJTU5GXlwcnJyd06tQJn3zyiRRQ4+Pj8dZbb1V4/Fr0pAgiekbxtnvjqEu33deq5xDVZgxERER1BwORcdSlQMRJGURERCR7DEREREQke7XitnsyHg4DG8+zMBRMRETGwREiIiIikj0GIiIiIpI9BiIiIiKSPQYiIiIikj0GIiIiIpI9BiIiIiKSPQYiIiIikj0GIiIiIpI9BiIiIiKSPQYiIiIikj0GIiIiIpI9BiIiIiKSPQYiIiIikj0GIiIiIpI9BiIiqhN27dqFPn36wMXFBQqFAhs3btRbHx0djWbNmsHS0hK2trbo3r079u/fr1ezdOlSBAYGwsbGBgqFAjdu3NBbf/78eURERMDLywsqlQqNGzfG1KlTUVxcXMVnR0RVjYGIiOqEW7duwc/PD7GxsRWuf/755xEbG4u0tDTs2bMHnp6e6NGjB3Jzc6Wa27dvIzg4GB999FGF+zh58iRKS0uxZMkSHD9+HDExMVi8ePFD64no2aEQQoia7sSzID8/H2q1GjqdDjY2NjXdnYeaoKjpHtQdX/FfxjNLoVAgMTER/fr1e2hN2b/pbdu2oVu3bnrrduzYgS5duiAvLw8NGjR45LG++uorLFq0COfOnTNCz6m68G+lcTwLfycr+/nNESIikp3i4mIsXboUarUafn5+T7UvnU4HOzs7I/WMiGoKAxERycbPP/8MKysrmJubIyYmBlu3boWDg8MT7+/s2bP45ptvMGLECCP2kohqAgMREclGly5doNVqsXfvXgQHB2PAgAHIycl5on1duXIFwcHB6N+/P9555x0j95SIqhsDERHJhqWlJZo0aYL27dtj2bJlqF+/PpYtW2bwfq5cuYIuXbogICAAS5curYKeElF1YyAiItkSQqCoqMigbS5fvozAwEC88MILWL58OUxM+GeUqC6oX9MdICIyhoKCApw5c0ZazszMhFarhZ2dHezt7fHFF18gNDQUzs7OuHbtGhYuXIhLly6hf//+0jbZ2dnIzs6W9pOWlgZra2u4u7vDzs4OV65cQWBgINzd3TF79my9W/Y1Gk31nSwRGR0DERHVCQcPHkSXLl2k5bFjxwIAhgwZgsWLF+PkyZNYsWIF/vrrL9jb2+PFF1/E7t270aJFC2mbxYsX49NPP5WWO3XqBABYvnw5hg4dii1btuDMmTM4c+YMXF1d9Y7PJ5gQPdv4HKJK4nOI5OdZeL4GET0Z/q00jmfh7ySfQ0RERERUSbxkRkRViv8nbjzPwv+NEz2rOEJEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyV6OBaNeuXejTpw9cXFygUCiwceNGvfVCCERHR8PFxQUqlQqBgYE4fvy4Xk1RURHGjBkDBwcHWFpaIjQ0FJcuXdKrycvLQ3h4ONRqNdRqNcLDw3Hjxo0qPjsiIiJ6VtRoILp16xb8/PwQGxtb4fpZs2Zh7ty5iI2NRUpKCjQaDYKCgnDz5k2pJioqComJiUhISMCePXtQUFCAkJAQlJSUSDVhYWHQarVISkpCUlIStFotwsPDq/z8iIiI6NlQvyYP3qtXL/Tq1avCdUIIzJs3Dx9//DFeffVVAMCKFSvg5OSENWvW4N1334VOp8OyZcuwcuVKdO/eHQCwatUquLm5Ydu2bejZsyfS09ORlJSE5ORk+Pv7AwDi4uIQEBCAjIwMNG3atHpOloiIiGqtWjuHKDMzE9nZ2ejRo4fUplQq0blzZ+zduxcAkJqairt37+rVuLi4oGXLllLNvn37oFarpTAEAO3bt4darZZqKlJUVIT8/Hy9FxEREdVNtTYQZWdnAwCcnJz02p2cnKR12dnZMDMzg62t7SNrHB0dy+3f0dFRqqnIjBkzpDlHarUabm5uT3U+REREVHvV2kBURqFQ6C0LIcq1PejBmorqH7efyZMnQ6fTSa+LFy8a2HMiIiJ6VtTaQKTRaACg3ChOTk6ONGqk0WhQXFyMvLy8R9ZcvXq13P5zc3PLjT79nVKphI2Njd6LiIiI6qZaG4i8vLyg0WiwdetWqa24uBg7d+5Ehw4dAABt27aFqampXk1WVhaOHTsm1QQEBECn0+HAgQNSzf79+6HT6aQaIiIikrcavcusoKAAZ86ckZYzMzOh1WphZ2cHd3d3REVFYfr06fD29oa3tzemT58OCwsLhIWFAQDUajUiIiIwbtw42Nvbw87ODuPHj4evr69015mPjw+Cg4MxbNgwLFmyBAAwfPhwhISE8A4zIiIiAlDDgejgwYPo0qWLtDx27FgAwJAhQxAfH4+JEyeisLAQI0eORF5eHvz9/bFlyxZYW1tL28TExKB+/foYMGAACgsL0a1bN8THx6NevXpSzerVqxEZGSndjRYaGvrQZx8RERGR/CiEEKKmO/EsyM/Ph1qthk6nq9XziSY8er45GeAr/sswCr4njYfvSePh+9I4noX3ZGU/v2vtHCIiIiKi6sJARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyV7+mO/CsEEIAAPLz82u4J49WVNMdqENq+a/6mcH3pPHwPWk8fF8ax7Pwniz73C77HH8YhXhcBQEALl26BDc3t5ruBhERET2BixcvwtXV9aHrGYgqqbS0FFeuXIG1tTUUCkVNd+eZlZ+fDzc3N1y8eBE2NjY13R0iAHxfUu3D96TxCCFw8+ZNuLi4wMTk4TOFeMmskkxMTB6ZLMkwNjY2/EdOtQ7fl1Tb8D1pHGq1+rE1nFRNREREssdARERERLLHQETVSqlUYurUqVAqlTXdFSIJ35dU2/A9Wf04qZqIiIhkjyNEREREJHsMRERERCR7DEREREQkewxEREQGCAwMRFRUVE13g2pIdfz+z58/D4VCAa1WW+lt4uPj0aBBgyrrkxzwwYxEJEuenp6Iiooy+MPthx9+gKmpadV0imo9/v7rLgYieqYVFxfDzMysprtBMmJnZ1fTXaAaxN9/3cVLZlQpnp6emDdvnl5b69atER0dDQBQKBRYtGgRevXqBZVKBS8vL6xfv16qLRsCTkhIQIcOHWBubo4WLVpgx44devs8ceIEevfuDSsrKzg5OSE8PBx//fWXtD4wMBCjR4/G2LFj4eDggKCgoKo6ZaphpaWlmDlzJpo0aQKlUgl3d3d88cUXAIC0tDR07doVKpUK9vb2GD58OAoKCqRthw4din79+mH27NlwdnaGvb09Ro0ahbt37wK4/z76888/8cEHH0ChUEjfT3jt2jUMGjQIrq6usLCwgK+vL9auXavXrwcvmXh6emL69Ol4++23YW1tDXd3dyxdulRaX1xcjNGjR8PZ2Rnm5ubw9PTEjBkzqurHRlXs77//hQsXwtvbG+bm5nBycsK//vWvSu0jKSkJL730Eho0aAB7e3uEhITg7NmzD63fsWMHFAoFfvnlF/j5+cHc3Bz+/v5IS0srV7t582b4+PjAysoKwcHByMrKktalpKQgKCgIDg4OUKvV6Ny5Mw4dOmTYD6AOYyAio5kyZQpee+01HDlyBG+++SYGDRqE9PR0vZoJEyZg3LhxOHz4MDp06IDQ0FBcu3YNAJCVlYXOnTujdevWOHjwIJKSknD16lUMGDBAbx8rVqxA/fr18d///hdLliyptvOj6jV58mTMnDkTU6ZMwYkTJ7BmzRo4OTnh9u3bCA4Ohq2tLVJSUrB+/Xps27YNo0eP1tv+jz/+wNmzZ/HHH39gxYoViI+PR3x8PID7lz1cXV3x2WefISsrS/rQuHPnDtq2bYuff/4Zx44dw/DhwxEeHo79+/c/sq9z5sxBu3btcPjwYYwcORLvvfceTp48CQCYP38+Nm3ahHXr1iEjIwOrVq2Cp6en0X9eVL0OHjyIyMhIfPbZZ8jIyEBSUhI6depUqW1v3bqFsWPHIiUlBdu3b4eJiQleeeUVlJaWPnK7CRMmYPbs2UhJSYGjoyNCQ0OlkA8At2/fxuzZs7Fy5Urs2rULFy5cwPjx46X1N2/exJAhQ7B7924kJyfD29sbvXv3xs2bN5/sh1DXCKJK8PDwEDExMXptfn5+YurUqUIIIQCIESNG6K339/cX7733nhBCiMzMTAFAfPnll9L6u3fvCldXVzFz5kwhhBBTpkwRPXr00NvHxYsXBQCRkZEhhBCic+fOonXr1sY8NaqF8vPzhVKpFHFxceXWLV26VNja2oqCggKp7ZdffhEmJiYiOztbCCHEkCFDhIeHh7h3755U079/fzFw4EBpuaL3dEV69+4txo0bJy137txZvP/++3r7efPNN6Xl0tJS4ejoKBYtWiSEEGLMmDGia9euorS09PEnTrVe2e9/w4YNwsbGRuTn5z/1PnNycgQAkZaWJoT439/Lw4cPCyGE+OOPPwQAkZCQIG1z7do1oVKpxHfffSeEEGL58uUCgDhz5oxUs2DBAuHk5PTQ4967d09YW1uLn3766anPoS7gCBEZTUBAQLnlB0eI/l5Tv359tGvXTqpJTU3FH3/8ASsrK+nVrFkzANAbTm7Xrl1VnQLVEunp6SgqKkK3bt0qXOfn5wdLS0uprWPHjigtLUVGRobU1qJFC9SrV09adnZ2Rk5OziOPW1JSgi+++AKtWrWCvb09rKyssGXLFly4cOGR27Vq1Ur6b4VCAY1GIx1r6NCh0Gq1aNq0KSIjI7Fly5ZHnzw9E4KCguDh4YFGjRohPDwcq1evxu3btyu17dmzZxEWFoZGjRrBxsYGXl5eAPDY99nf/37a2dmhadOmen9jLSws0LhxY2n5wfd8Tk4ORowYgeeffx5qtRpqtRoFBQWPPa5ccFI1VYqJiQnEA9/y8veh2ocpm5tRmZrS0lL06dMHM2fOLFfj7Ows/fffPwipblKpVA9dJ4R46Pvq7+0P3gmkUCgee0lizpw5iImJwbx58+Dr6wtLS0tERUWhuLj4kds96lgvvPACMjMz8dtvv2Hbtm0YMGAAunfvju+///6R+6TazdraGocOHcKOHTuwZcsWfPLJJ4iOjkZKSspjb3/v06cP3NzcEBcXBxcXF5SWlqJly5aPfZ9V5HHv+b//3R46dChyc3Mxb948eHh4QKlUIiAg4ImOWxdxhIgqpWHDhnqT8/Lz85GZmalXk5ycXG65bISnopp79+4hNTVVqnnhhRdw/PhxeHp6okmTJnovhiB58fb2hkqlwvbt28uta968ObRaLW7duiW1/fe//4WJiQmef/75Sh/DzMwMJSUlem27d+9G37598eabb8LPzw+NGjXC6dOnn/xE/j8bGxsMHDgQcXFx+O6777BhwwZcv379qfdLNat+/fro3r07Zs2ahaNHj+L8+fP4/fffH7nNtWvXkJ6ejv/7v/9Dt27d4OPjg7y8vEod7+9/P/Py8nDq1Klyf2MfZffu3YiMjETv3r3RokULKJVKvZtW5I4jRFQpXbt2RXx8PPr06QNbW1tMmTJF73IEAKxfvx7t2rXDSy+9hNWrV+PAgQNYtmyZXs2CBQvg7e0NHx8fxMTEIC8vD2+//TYAYNSoUYiLi8OgQYMwYcIEODg44MyZM0hISEBcXFy541HdZW5ujkmTJmHixIkwMzNDx44dkZubi+PHj+ONN97A1KlTMWTIEERHRyM3NxdjxoxBeHg4nJycKn0MT09P7Nq1C6+//jqUSiUcHBzQpEkTbNiwAXv37oWtrS3mzp2L7Oxs+Pj4PPG5xMTEwNnZGa1bt4aJiQnWr18PjUbDh+g9437++WecO3cOnTp1gq2tLX799VeUlpaiadOmj9zO1tYW9vb2WLp0KZydnXHhwgV8+OGHlTrmZ599Bnt7ezg5OeHjjz+Gg4MD+vXrV+k+N2nSBCtXrkS7du2Qn5+PCRMmPHI0Vm44QkSVMnnyZHTq1AkhISHo3bs3+vXrp3etGgA+/fRTJCQkoFWrVlixYgVWr16N5s2b69V8+eWXmDlzJvz8/LB79278+OOPcHBwAAC4uLjgv//9L0pKStCzZ0+0bNkS77//PtRqNUxM+FaVmylTpmDcuHH45JNP4OPjg4EDByInJwcWFhbYvHkzrl+/jhdffBH/+te/0K1bN8TGxhq0/88++wznz59H48aN0bBhQ+mYL7zwAnr27InAwEBoNBqDPnAqYmVlhZkzZ6Jdu3Z48cUXcf78efz66698Tz/jGjRogB9++AFdu3aFj48PFi9ejLVr16JFixaP3M7ExAQJCQlITU1Fy5Yt8cEHH+Crr76q1DG//PJLvP/++2jbti2ysrKwadMmg57D9p///Ad5eXlo06YNwsPDERkZCUdHx0pvX9cpxIMTQ4iegEKhQGJi4kM/PM6fPw8vLy8cPnwYrVu3rta+ERE9y3bs2IEuXbogLy+PI4tViP+LQkRERLLHQERERGQkFy5c0Ht0yIMv3uJee/GSGRERkZHcu3cP58+ff+h6T09P1K/P+5lqIwYiIiIikj1eMiMiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiolonMDAQUVFRNd0No4qPj6+Sh+qdP38eCoUCWq3W6PsmkhMGIiJ6ajk5OXj33Xfh7u4OpVIJjUaDnj17Yt++fVKNQqHAxo0bK7W/H374AZ9//nkV9bbqeXp6Yt68eTXdDSIyAB+GQERP7bXXXsPdu3exYsUKNGrUCFevXsX27dsN/kb3u3fvwtTUFHZ2dlXUUyKiinGEiIieyo0bN7Bnzx7MnDkTXbp0gYeHB/7xj39g8uTJePnllwHcHzEBgFdeeQUKhUJajo6ORuvWrfGf//wHjRo1glKphBCi3CUzT09PTJ8+HW+//Tasra3h7u6OpUuX6vVj7969aN26NczNzdGuXTts3LjxsZeSPD09MW3aNAwePBhWVlbw8PDAjz/+iNzcXPTt2xdWVlbw9fXFwYMHyx2rU6dOUKlUcHNzQ2RkJG7dugXg/uW+P//8Ex988AEUCgUUCoXetps3b4aPjw+srKwQHByMrKwsaV1paSk+++wzuLq6QqlUonXr1khKStLb/sCBA2jTpo10nocPH37s74iIHo+BiIieStlXEmzcuBFFRUUV1qSkpAAAli9fjqysLGkZAM6cOYN169Zhw4YNjwwvc+bMkQLAyJEj8d577+HkyZMAgJs3b6JPnz7w9fXFoUOH8Pnnn2PSpEmV6n9MTAw6duyIw4cP4+WXX0Z4eDgGDx6MN998E4cOHUKTJk0wePBglD3DNi0tDT179sSrr76Ko0eP4rvvvsOePXswevRoAPcv97m6uuKzzz5DVlaWXuC5ffs2Zs+ejZUrV2LXrl24cOECxo8fL63/+uuvMWfOHMyePRtHjx5Fz549ERoaitOnTwMAbt26hZCQEDRt2hSpqamIjo7W256InoIgInpK33//vbC1tRXm5uaiQ4cOYvLkyeLIkSN6NQBEYmKiXtvUqVOFqampyMnJ0Wvv3LmzeP/996VlDw8P8eabb0rLpaWlwtHRUSxatEgIIcSiRYuEvb29KCwslGri4uIEAHH48OGH9vvB/WZlZQkAYsqUKVLbvn37BACRlZUlhBAiPDxcDB8+XG8/u3fvFiYmJtLxPTw8RExMjF7N8uXLBQBx5swZqW3BggXCyclJWnZxcRFffPGF3nYvvviiGDlypBBCiCVLlgg7Oztx69Ytaf2iRYsee55E9HgcISKip/baa6/hypUr2LRpE3r27IkdO3bghRdeQHx8/GO39fDwQMOGDR9b16pVK+m/FQoFNBoNcnJyAAAZGRlo1aoVzM3NpZp//OMfler73/fr5OQEAPD19S3XVnas1NRUxMfH631hZ8+ePVFaWorMzMxHHsvCwgKNGzeWlp2dnaX95ufn48qVK+jYsaPeNh07dkR6ejoAID09HX5+frCwsJDWBwQEVOo8iejROKmaiIzC3NwcQUFBCAoKwieffIJ33nkHU6dOxdChQx+5naWlZaX2b2pqqresUChQWloKABBClJurIyr5NY1/32/ZPipqKztWaWkp3n33XURGRpbbl7u7u8Hn8GA/KzqPsrbKnhMRGY4jRERUJZo3by5NNAbuh4GSkpIqOVazZs1w9OhRvTlMD06ENpYXXngBx48fR5MmTcq9zMzMAABmZmYGn6uNjQ1cXFywZ88evfa9e/fCx8cHwP2f6ZEjR1BYWCitT05OfsozIiKAgYiIntK1a9fQtWtXrFq1CkePHkVmZibWr1+PWbNmoW/fvlKdp6cntm/fjuzsbOTl5Rm1D2FhYSgtLcXw4cORnp6OzZs3Y/bs2QDKj7g8rUmTJmHfvn0YNWoUtFotTp8+jU2bNmHMmDFSjaenJ3bt2oXLly/jr7/+qvS+J0yYgJkzZ+K7775DRkYGPvzwQ2i1Wrz//vvSeZqYmCAiIgInTpzAr7/+Kp0nET0dBiIieipWVlbw9/dHTEwMOnXqhJYtW2LKlCkYNmwYYmNjpbo5c+Zg69atcHNzQ5s2bYzaBxsbG/z000/QarVo3bo1Pv74Y3zyyScAoDevyBhatWqFnTt34vTp0/jnP/+JNm3aYMqUKXB2dpZqPvvsM5w/fx6NGzeu1PyoMpGRkRg3bhzGjRsHX19fJCUlYdOmTfD29gZw/2f9008/4cSJE2jTpg0+/vhjzJw506jnRyRXCsGL0kRUB61evRpvvfUWdDodVCpVTXeHiGo5Tqomojrh22+/RaNGjfDcc8/hyJEjmDRpEgYMGMAwRESVwkBERHVCdnY2PvnkE2RnZ8PZ2Rn9+/fHF198UdPdIqJnBC+ZERERkexxUjURERHJHgMRERERyR4DEREREckeAxERERHJHgMRERERyR4DEREREckeAxERERHJHgMRERERyd7/A/DUnzT14CFPAAAAAElFTkSuQmCC",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ax = performance_df.plot.bar(\n",
+    "    color=\"#7400ff\",\n",
+    "    ylim=(1, 7000),\n",
+    "    rot=0,\n",
+    "    xlabel=\"String method\",\n",
+    "    ylabel=\"Speedup factor\",\n",
+    ")\n",
+    "ax.bar_label(ax.containers[0], fmt=\"%.0f\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## User-defined function (UDF) performance (with JIT overhead)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The first UDF runs include JIT compilation overhead, due to which the performance of first run and average of next few runs are compared separately."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>87</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>71</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>63</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999995</th>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999996</th>\n",
+       "      <td>28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999997</th>\n",
+       "      <td>31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999998</th>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999999</th>\n",
+       "      <td>47</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10000000 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         age\n",
+       "0         87\n",
+       "1         71\n",
+       "2         63\n",
+       "3         40\n",
+       "4         92\n",
+       "...      ...\n",
+       "9999995    4\n",
+       "9999996   28\n",
+       "9999997   31\n",
+       "9999998    4\n",
+       "9999999   47\n",
+       "\n",
+       "[10000000 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "num_rows = 10_000_000\n",
+    "pdf_age = pd.DataFrame(\n",
+    "    {\n",
+    "        \"age\": np.random.randint(0, 100, num_rows),\n",
+    "    }\n",
+    ")\n",
+    "pdf_age"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>87</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>71</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>63</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999995</th>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999996</th>\n",
+       "      <td>28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999997</th>\n",
+       "      <td>31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999998</th>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999999</th>\n",
+       "      <td>47</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10000000 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         age\n",
+       "0         87\n",
+       "1         71\n",
+       "2         63\n",
+       "3         40\n",
+       "4         92\n",
+       "...      ...\n",
+       "9999995    4\n",
+       "9999996   28\n",
+       "9999997   31\n",
+       "9999998    4\n",
+       "9999999   47\n",
+       "\n",
+       "[10000000 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gdf_age = cudf.from_pandas(pdf_age)\n",
+    "gdf_age"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def age_udf(row):\n",
+    "    if row[\"age\"] < 18:\n",
+    "        return 0\n",
+    "    elif 18 <= row[\"age\"] < 20:\n",
+    "        return 1\n",
+    "    elif 20 <= row[\"age\"] < 30:\n",
+    "        return 2\n",
+    "    elif 30 <= row[\"age\"] < 40:\n",
+    "        return 3\n",
+    "    elif 40 <= row[\"age\"] < 50:\n",
+    "        return 4\n",
+    "    elif 50 <= row[\"age\"] < 60:\n",
+    "        return 5\n",
+    "    elif 60 <= row[\"age\"] < 70:\n",
+    "        return 6\n",
+    "    else:\n",
+    "        return 7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pandas_int_udf, cudf_int_udf = timeit_pandas_cudf(\n",
+    "    pdf_age, gdf_age, lambda df: df.apply(age_udf, axis=1), number=1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def str_isupper_udf(row):\n",
+    "    if row.isupper():\n",
+    "        return 0\n",
+    "    else:\n",
+    "        return 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0                    AI\n",
+       "1                   ABC\n",
+       "2           hello world\n",
+       "3                   abc\n",
+       "4           hello world\n",
+       "               ...     \n",
+       "99999995             AI\n",
+       "99999996             AI\n",
+       "99999997            abc\n",
+       "99999998            abc\n",
+       "99999999    hello world\n",
+       "Name: strings, Length: 100000000, dtype: object"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd_series = pd.Series(\n",
+    "    np.random.choice([\"ABC\", \"abc\", \"hello world\", \"AI\"], size=100_000_000),\n",
+    "    name=\"strings\",\n",
+    ")\n",
+    "pd_series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0                    AI\n",
+       "1                   ABC\n",
+       "2           hello world\n",
+       "3                   abc\n",
+       "4           hello world\n",
+       "               ...     \n",
+       "99999995             AI\n",
+       "99999996             AI\n",
+       "99999997            abc\n",
+       "99999998            abc\n",
+       "99999999    hello world\n",
+       "Name: strings, Length: 100000000, dtype: object"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gd_series = cudf.from_pandas(pd_series)\n",
+    "gd_series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pandas_str_udf, cudf_str_udf = timeit_pandas_cudf(\n",
+    "    pd_series, gd_series, lambda s: s.apply(str_isupper_udf), number=1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cudf speedup vs. pandas</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Numeric</th>\n",
+       "      <td>362.091673</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>String</th>\n",
+       "      <td>204.865789</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         cudf speedup vs. pandas\n",
+       "Numeric               362.091673\n",
+       "String                204.865789"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "performance_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"cudf speedup vs. pandas\": [\n",
+    "            pandas_int_udf / cudf_int_udf,\n",
+    "            pandas_str_udf / cudf_str_udf,\n",
+    "        ]\n",
+    "    },\n",
+    "    index=[\"Numeric\", \"String\"],\n",
+    ")\n",
+    "performance_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below is the plot showing performance speedup in case of Numeric UDFs & String UDFs on their first runs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA98klEQVR4nO3deXxM9/7H8fdk30OCLARR+660KXUttbd2t7S0pVQpqrmlXHUVlyZoLVW7aqPWS5VLF7W0Uq5qCSlKKU2LSppWI7FEgpzfHx6ZX6exZJKJxMnr+XjM42G+5ztnPidM5u18v+d7LIZhGAIAADApp8IuAAAAoCARdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKm5FHYBRUFWVpbOnj0rX19fWSyWwi4HAADkgmEYunDhgkJDQ+XkdOvzN4QdSWfPnlVYWFhhlwEAAPLg9OnTKleu3C23E3Yk+fr6Srrxw/Lz8yvkagAAQG6kpaUpLCzM+j1+K4QdyTp05efnR9gBAOAec6cpKExQBgAApkbYAQAApkbYAQAApsacHQCww/Xr13X16tXCLgMoFlxdXeXs7Jzv/RB2ACAXDMNQUlKSzp8/X9ilAMVKiRIlFBwcnK918Ag7AJAL2UGnTJky8vLyYgFSoIAZhqHLly8rOTlZkhQSEpLnfRF2AOAOrl+/bg06gYGBhV0OUGx4enpKkpKTk1WmTJk8D2kxQRkA7iB7jo6Xl1chVwIUP9mfu/zMlSPsAEAuMXQF3H2O+NwRdgAAgKkRdgAAgKkxQRkA8uGVuzyy9YZxd9/vZmJiYhQZGWlzGf6iRYs0adIk/fLLL5oxY4YiIyMLrT57/PTTTwoPD9eBAwdUv379wi7nnlWxYkVFRkYW2b93wg4AIF/S0tI0bNgwzZgxQz169JC/v39hlwTYIOwAAPLl1KlTunr1qh577LF8rYUCFBTm7ACAiWVlZWnq1KmqXLmy3N3dVb58eb3++uuSpB07dshisdgMR8XHx8tiseinn36ytsXExKh8+fLy8vJSt27ddO7cOZttderUkSRVqlQpx2uzZWZmatiwYQoJCZGHh4cqVqyo6Oho63aLxaL58+erQ4cO8vT0VHh4uNauXWuzj19++UW9evVSyZIlFRgYqC5duuR4r/fee081atSQh4eHqlevrnnz5tls/+abb9SgQQN5eHioUaNGOnDggM32mJgYlShRwqZtw4YNNlcETZgwQfXr19fChQsVFhYmLy8vPf7447dcXTsrK0vlypXTggULbNr3798vi8WiH3/80brf8uXLy93dXaGhoRo+fPhN93czP/30kywWi1avXq0mTZrIw8NDtWrV0o4dO6x9rl+/rgEDBig8PFyenp6qVq2a3nrrLZv99OvXT127dtWbb76pkJAQBQYGaujQoTaXfScnJ6tTp07Wv6cVK1bkqGfGjBmqU6eOvL29FRYWpiFDhujixYvW7T///LM6deqkkiVLytvbW7Vq1dInn3yS6+O1F2EHAExszJgxmjp1qsaNG6cjR45o5cqVCgoKyvXrv/76a/Xv319DhgxRfHy8WrZsqcmTJ1u39+rVS9u2bZN0I0gkJiYqLCwsx35mz56tjRs3as2aNTp27JiWL1+uihUr2vQZN26cevTooW+//VZPPfWUnnzySR09elSSdPnyZbVs2VI+Pj768ssvtWvXLvn4+Kh9+/bKzMyUJC1evFhjx47V66+/rqNHjyoqKkrjxo3T0qVLJUmXLl1Sx44dVa1aNcXFxWnChAkaOXKkXT/PbCdOnNCaNWu0adMmbd68WfHx8Ro6dOhN+zo5OemJJ57IEQpWrlypxo0bq1KlSvrggw80c+ZMLVy4UD/88IM2bNhgDZH2eOWVVzRixAgdOHBATZo0UefOna3hNDt0rVmzRkeOHNFrr72mV199VWvWrLHZxxdffKGTJ0/qiy++0NKlSxUTE6OYmBjr9n79+umnn37S559/rg8++EDz5s2zrnL852OePXu2Dh8+rKVLl+rzzz/XqFGjrNuHDh2qjIwMffnllzp06JCmTp0qHx8fu4831wwYqamphiQjNTW1sEsBUASlp6cbR44cMdLT03NsG6m7+7BHWlqa4e7ubixevPim27/44gtDkpGSkmJtO3DggCHJSEhIMAzDMJ588kmjffv2Nq/r1auX4e/vf8vX3MyLL75oPPLII0ZWVtZNt0syBg8ebNMWERFhvPDCC4ZhGMaSJUuMatWq2bw+IyPD8PT0ND777DPDMAwjLCzMWLlypc0+Jk2aZDRu3NgwDMNYuHChERAQYFy6dMm6ff78+YYk48CBA4ZhGMZ7771nc2yGYRjr1683/vx1OX78eMPZ2dk4ffq0te3TTz81nJycjMTExJse3/79+w2LxWL89NNPhmEYxvXr142yZcsac+fONQzDMKZPn25UrVrVyMzMvOnr7yQhIcGQZEyZMsXadvXqVaNcuXLG1KlTb/m6IUOGGD169LA+79u3r1GhQgXj2rVr1rbHH3/c6NWrl2EYhnHs2DFDkrFnzx7r9qNHjxqSjJkzZ97yfdasWWMEBgZan9epU8eYMGFCro7tdp+/3H5/c2YHAEzq6NGjysjIUKtWrfK1j8aNG9u0/fV5bvTr10/x8fGqVq2ahg8fri1btuToc7P3yT6zExcXpxMnTsjX11c+Pj7y8fFRQECArly5opMnT+q3337T6dOnNWDAAOt2Hx8fTZ48WSdPnrQeS7169WxWws7LsUhS+fLlVa5cOZv9ZGVl6dixYzft36BBA1WvXl2rVq2SJMXGxio5OVk9e/aUJD3++ONKT09XpUqVNHDgQK1fv17Xrl2zu64/H4+Li4saNWpk/RlK0oIFC9SoUSOVLl1aPj4+Wrx4sU6dOmWzj1q1atncliEkJMR65ubo0aPW/WarXr16jqG/L774Qm3atFHZsmXl6+urZ555RufOndOlS5ckScOHD9fkyZP18MMPa/z48Tp48KDdx2oPwg4AmFT2fYVuxcnpxleAYfz/9ex/XZL/z9vy4/7771dCQoImTZqk9PR09ezZU3//+9/v+LrsuTJZWVlq2LCh4uPjbR7Hjx9X7969lZWVJenGUNaftx8+fFh79uzJ9bE4OTnl6Jeb2xRk13m71X779OmjlStXSroxhNWuXTuVKlVKkhQWFqZjx45p7ty58vT01JAhQ9SsWbN83SLhr7WtWbNG//jHP9S/f39t2bJF8fHxevbZZ63DgNlcXV1zvD7755v9s7ndcf7888969NFHVbt2ba1bt05xcXGaO3eupP//WT733HP68ccf9fTTT+vQoUNq1KiR3n777Xwf660QdgDApKpUqSJPT09t3779pttLly4tSUpMTLS2xcfH2/SpWbOmNSxk++vz3PLz81OvXr20ePFi/ec//9G6dev0xx9/3HK/e/bsUfXq1SXdCEs//PCDypQpo8qVK9s8/P39FRQUpLJly+rHH3/MsT08PNx6LN9++63S09Nv+Z6lS5fWhQsXrGcgbvYzkW5cgXb27Fnr86+++kpOTk6qWrXqLY+/d+/eOnTokOLi4vTBBx+oT58+Nts9PT3VuXNnzZ49Wzt27NBXX32lQ4cO3XJ/N/Pn47l27Zri4uKsP8OdO3eqSZMmGjJkiBo0aKDKlStbz3rlVo0aNXTt2jXt27fP2nbs2DGbydn79u3TtWvXNH36dD300EOqWrWqzc8qW1hYmAYPHqwPP/xQI0aM0OLFi+2qxR6EHQAwKQ8PD40ePVqjRo3S+++/r5MnT2rPnj1asmSJJKly5coKCwvThAkTdPz4cX388ceaPn26zT6GDx+uzZs3a9q0aTp+/LjmzJmjzZs3213LzJkztXr1an3//fc6fvy41q5dq+DgYJvhj7Vr1+rdd9/V8ePHNX78eH3zzTcaNmyYpBtnRUqVKqUuXbpo586dSkhIUGxsrF566SWdOXNG0o2rmaKjo/XWW2/p+PHjOnTokN577z3NmDFD0o2w4eTkpAEDBujIkSP65JNP9Oabb9rUGRERIS8vL7366qs6ceKEVq5caTM5988/2759++rbb7/Vzp07NXz4cPXs2VPBwcG3/BmEh4erSZMmGjBggK5du6YuXbpYt8XExGjJkiU6fPiwfvzxRy1btkyenp6qUKGCpBsTzZ955pk7/pznzp2r9evX6/vvv9fQoUOVkpKi/v37S7rx971v3z599tlnOn78uMaNG6e9e/fecZ9/Vq1aNbVv314DBw7U119/rbi4OD333HM2ZxHvu+8+Xbt2TW+//bb1WP56JVpkZKQ+++wzJSQkaP/+/fr8889Vo0YNu2qxS65mB5kcE5QB3M7tJkgWddevXzcmT55sVKhQwXB1dTXKly9vREVFWbfv2rXLqFOnjuHh4WH87W9/M9auXZtjsvGSJUuMcuXKGZ6enkanTp2MN9980+4JyosWLTLq169veHt7G35+fkarVq2M/fv3W7dLMubOnWu0adPGcHd3NypUqGCsWrXKZh+JiYnGM888Y5QqVcpwd3c3KlWqZAwcONDmd/eKFSuM+vXrG25ubkbJkiWNZs2aGR9++KF1+1dffWXUq1fPcHNzM+rXr2+sW7fOZoKyYdyYkFy5cmXDw8PD6Nixo7Fo0aIcE5Tr1atnzJs3zwgNDTU8PDyM7t27G3/88ccd/z7mzp1rSDKeeeYZm/b169cbERERhp+fn+Ht7W089NBDxrZt26zb+/btazRv3vyW+82eoLxy5UojIiLCcHNzM2rUqGFs377d2ufKlStGv379DH9/f6NEiRLGCy+8YPzzn/806tWrZ/M+Xbp0sdn3Sy+9ZPPeiYmJxmOPPWa4u7sb5cuXN95//32jQoUKNhOUZ8yYYYSEhBienp5Gu3btjPfff99mMvywYcOM++67z3B3dzdKly5tPP3008bvv/9+02NzxARli2E4aED2HpaWliZ/f3+lpqbKz8+vsMsBUMRcuXJFCQkJCg8Pl4eHR2GXY0oWi0Xr169X165dC7uUO5owYYI2bNhw0+GtwmLm217c7vOX2+9vhrEAAICpEXYAAICpMYwlhrEA3B7DWEDhYRgLAO4i/m8I3H2O+NwVatiZMGGCLBaLzePPl+0ZhqEJEyYoNDRUnp6eatGihb777jubfWRkZOjFF19UqVKl5O3trc6dO1svQwQAR8heZO3y5cuFXAlQ/GR/7v662KE9XBxVTF7VqlXLehM5STZLVE+bNk0zZsxQTEyMqlatqsmTJ6tNmzY6duyYfH19Jd24Vn/Tpk1avXq1AgMDNWLECHXs2FFxcXE2+wKAvHJ2dlaJEiWsS+Z7eXnddgVZAPlnGIYuX76s5ORklShRIl/f6YUedlxcXG66CJNhGJo1a5bGjh2r7t27S5KWLl2qoKAgrVy5UoMGDVJqaqqWLFmiZcuWqXXr1pKk5cuXKywsTNu2bVO7du3u6rEAMK/s31N/vbszgIJVokSJ2y7WmBuFHnZ++OEHhYaGyt3dXREREYqKilKlSpWUkJCgpKQktW3b1trX3d1dzZs31+7duzVo0CDFxcXp6tWrNn1CQ0NVu3Zt7d69+5ZhJyMjQxkZGdbnaWlpBXeAAEzBYrEoJCREZcqUccj9igDcmaurq0NGaQo17EREROj9999X1apV9euvv2ry5Mlq0qSJvvvuOyUlJUmSgoKCbF4TFBSkn3/+WZKUlJQkNzc3lSxZMkef7NffTHR0tCZOnOjgowFQHDg7OzNEDtxjCnWCcocOHdSjRw/VqVNHrVu31scffyzpxnBVtr+OixuGccex8jv1GTNmjFJTU62P06dP5+MoAABAUVakLj339vZWnTp19MMPP1jH5/56hiY5Odl6tic4OFiZmZlKSUm5ZZ+bcXd3l5+fn80DAACYU5EKOxkZGTp69KhCQkIUHh6u4OBgbd261bo9MzNTsbGxatKkiSSpYcOGcnV1temTmJiow4cPW/sAAIDirVDn7IwcOVKdOnVS+fLllZycrMmTJystLU19+/aVxWJRZGSkoqKiVKVKFVWpUkVRUVHy8vJS7969JUn+/v4aMGCARowYocDAQAUEBGjkyJHWYTEAAIBCDTtnzpzRk08+qd9//12lS5fWQw89pD179qhChQqSpFGjRik9PV1DhgxRSkqKIiIitGXLFusaO5I0c+ZMubi4qGfPnkpPT1erVq0UExPDBEIAACCJe2NJ4t5YAADci7g3FgAAgAg7AADA5Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1IpM2ImOjpbFYlFkZKS1zTAMTZgwQaGhofL09FSLFi303Xff2bwuIyNDL774okqVKiVvb2917txZZ86cucvVAwCAoqpIhJ29e/dq0aJFqlu3rk37tGnTNGPGDM2ZM0d79+5VcHCw2rRpowsXLlj7REZGav369Vq9erV27dqlixcvqmPHjrp+/frdPgwAAFAEFXrYuXjxovr06aPFixerZMmS1nbDMDRr1iyNHTtW3bt3V+3atbV06VJdvnxZK1eulCSlpqZqyZIlmj59ulq3bq0GDRpo+fLlOnTokLZt21ZYhwQAAIqQQg87Q4cO1WOPPabWrVvbtCckJCgpKUlt27a1trm7u6t58+bavXu3JCkuLk5Xr1616RMaGqratWtb+9xMRkaG0tLSbB4AAMCcXArzzVevXq39+/dr7969ObYlJSVJkoKCgmzag4KC9PPPP1v7uLm52ZwRyu6T/fqbiY6O1sSJE/NbPgAAuAcU2pmd06dP66WXXtLy5cvl4eFxy34Wi8XmuWEYOdr+6k59xowZo9TUVOvj9OnT9hUPAADuGYUWduLi4pScnKyGDRvKxcVFLi4uio2N1ezZs+Xi4mI9o/PXMzTJycnWbcHBwcrMzFRKSsot+9yMu7u7/Pz8bB4AAMCcCi3stGrVSocOHVJ8fLz10ahRI/Xp00fx8fGqVKmSgoODtXXrVutrMjMzFRsbqyZNmkiSGjZsKFdXV5s+iYmJOnz4sLUPAAAo3gptzo6vr69q165t0+bt7a3AwEBre2RkpKKiolSlShVVqVJFUVFR8vLyUu/evSVJ/v7+GjBggEaMGKHAwEAFBARo5MiRqlOnTo4JzwAAoHgq1AnKdzJq1Cilp6dryJAhSklJUUREhLZs2SJfX19rn5kzZ8rFxUU9e/ZUenq6WrVqpZiYGDk7Oxdi5QAAoKiwGIZhFHYRhS0tLU3+/v5KTU1l/g4AAPeI3H5/F/o6OwAAAAWJsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsANTmD9/vurWrWu911njxo316aef2vQ5evSoOnfuLH9/f/n6+uqhhx7SqVOnJEl//PGHXnzxRVWrVk1eXl4qX768hg8frtTU1MI4HACAAxXpFZSB3CpXrpymTJmiypUrS5KWLl2qLl266MCBA6pVq5ZOnjyppk2basCAAZo4caL8/f119OhReXh4SJLOnj2rs2fP6s0331TNmjX1888/a/DgwTp79qw++OCDwjw0AEA+sYKyWEHZrAICAvTGG29owIABeuKJJ+Tq6qply5bl+vVr167VU089pUuXLsnFhf8XAEBRwwrKKLauX7+u1atX69KlS2rcuLGysrL08ccfq2rVqmrXrp3KlCmjiIgIbdiw4bb7yf7wEHQA4N5G2IFpHDp0SD4+PnJ3d9fgwYO1fv161axZU8nJybp48aKmTJmi9u3ba8uWLerWrZu6d++u2NjYm+7r3LlzmjRpkgYNGnSXjwIA4GgMY4lhLLPIzMzUqVOndP78ea1bt07vvPOOYmNjVaJECZUtW1ZPPvmkVq5cae3fuXNneXt7a9WqVTb7SUtLU9u2bVWyZElt3LhRrq6ud/tQAAC5wDAWih03NzdVrlxZjRo1UnR0tOrVq6e33npLpUqVkouLi2rWrGnTv0aNGtarsbJduHBB7du3l4+Pj9avX0/QAQATIOzAtAzDUEZGhtzc3PTAAw/o2LFjNtuPHz+uChUqWJ9nn9Fxc3PTxo0brVdqAQDubcy8hCm8+uqr6tChg8LCwnThwgWtXr1aO3bs0ObNmyVJr7zyinr16qVmzZqpZcuW2rx5szZt2qQdO3ZIunFGp23btrp8+bKWL1+utLQ0paWlSZJKly4tZ2fnwjo0AEA+EXZgCr/++quefvppJSYmyt/fX3Xr1tXmzZvVpk0bSVK3bt20YMECRUdHa/jw4apWrZrWrVunpk2bSpLi4uL09ddfS5J1rZ5sCQkJqlix4l09HgCA4zBBWUxQBgDgXsQEZQAAADGMVey9YinsCnA3vVHsz+MCKI44swMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEzNrrBz7do1TZw4UadPny6oegAAABzKrrDj4uKiN954Q9evXy+oegAAABzK7mGs1q1bW+8nBAAAUNTZvahghw4dNGbMGB0+fFgNGzaUt7e3zfbOnTs7rDgAAID8svveWE5Otz4ZZLFY7skhruJ8byxWUC5eWEEZgJnk9vvb7jM7WVlZ+SoMAADgbuLScwAAYGp5CjuxsbHq1KmTKleurCpVqqhz587auXOno2sDAADIN7vDzvLly9W6dWt5eXlp+PDhGjZsmDw9PdWqVSutXLmyIGoEAADIM7snKNeoUUPPP/+8/vGPf9i0z5gxQ4sXL9bRo0cdWuDdwARlFBdMUAZgJrn9/rb7zM6PP/6oTp065Wjv3LmzEhIS7N0dAABAgbI77ISFhWn79u052rdv366wsDCHFAUAAOAodl96PmLECA0fPlzx8fFq0qSJLBaLdu3apZiYGL311lsFUSMAAECe2R12XnjhBQUHB2v69Olas2aNpBvzeP7zn/+oS5cuDi8QAAAgP+wOO5LUrVs3devWzdG1AAAAOJzdc3YqVaqkc+fO5Wg/f/68KlWq5JCiAAAAHMXusPPTTz/d9P5XGRkZ+uWXXxxSFAAAgKPkehhr48aN1j9/9tln8vf3tz6/fv26tm/frooVKzq0OAAAgPzKddjp2rWrpBt3Nu/bt6/NNldXV1WsWFHTp093aHEAAAD5leuwk3238/DwcO3du1elSpUqsKIAAAAcxe6rsVglGQAA3EvsnqA8fPhwzZ49O0f7nDlzFBkZ6YiaAAAAHMbusLNu3To9/PDDOdqbNGmiDz74wCFFAQAAOIrdYefcuXM2V2Jl8/Pz0++//+6QogAAABzF7rBTuXJlbd68OUf7p59+yqKCAACgyLF7gvLLL7+sYcOG6bffftMjjzwi6cYdz6dPn65Zs2Y5uj4AAIB8sTvs9O/fXxkZGXr99dc1adIkSVLFihU1f/58PfPMMw4vEAAAID8shmEYeX3xb7/9Jk9PT/n4+DiyprsuLS1N/v7+Sk1NlZ+fX2GXc1e9YinsCnA3vZHnTzsAFD25/f7O013Ps5UuXTo/LwcAAChweQo7H3zwgdasWaNTp04pMzPTZtv+/fsdUhgAAIAj2H011uzZs/Xss8+qTJkyOnDggB588EEFBgbqxx9/VIcOHQqiRgAAgDyzO+zMmzdPixYt0pw5c+Tm5qZRo0Zp69atGj58uFJTUwuiRgAAgDyzO+ycOnVKTZo0kSR5enrqwoULkqSnn35aq1atsmtf8+fPV926deXn5yc/Pz81btxYn376qXW7YRiaMGGCQkND5enpqRYtWui7776z2UdGRoZefPFFlSpVSt7e3urcubPOnDlj72EBAACTsjvsBAcH69y5c5KkChUqaM+ePZJu3CDU3gu7ypUrpylTpmjfvn3at2+fHnnkEXXp0sUaaKZNm6YZM2Zozpw52rt3r4KDg9WmTRtrwJKkyMhIrV+/XqtXr9auXbt08eJFdezYUdevX7f30AAAgAnZfen5c889p7CwMI0fP14LFizQyy+/rIcfflj79u1T9+7dtWTJknwVFBAQoDfeeEP9+/dXaGioIiMjNXr0aEk3zuIEBQVp6tSpGjRokFJTU1W6dGktW7ZMvXr1kiSdPXtWYWFh+uSTT9SuXbubvkdGRoYyMjKsz9PS0hQWFsal5zA9Lj0HYCYFdun5okWLlJWVJUkaPHiwAgICtGvXLnXq1EmDBw/Oc8HXr1/X2rVrdenSJTVu3FgJCQlKSkpS27ZtrX3c3d3VvHlz7d69W4MGDVJcXJyuXr1q0yc0NFS1a9fW7t27bxl2oqOjNXHixDzXCgAA7h25Gsbq3r270tLSJEnLly+3GSLq2bOnZs+ereHDh8vNzc3uAg4dOiQfHx+5u7tr8ODBWr9+vWrWrKmkpCRJUlBQkE3/oKAg67akpCS5ubmpZMmSt+xzM2PGjFFqaqr1cfr0abvrBgAA94ZchZ2PPvpIly5dkiQ9++yzDr3qqlq1aoqPj9eePXv0wgsvqG/fvjpy5Ih1u8ViO85iGEaOtr+6Ux93d3frpOjsBwAAMKdcDWNVr15dY8aMUcuWLWUYhtasWXPLgGDv/bHc3NxUuXJlSVKjRo20d+9evfXWW9Z5OklJSQoJCbH2T05Otp7tCQ4OVmZmplJSUmzO7iQnJ1uvGAMAAMVbrsJO9kTkjz/+WBaLRf/6179ueubEYrHk+2aghmEoIyND4eHhCg4O1tatW9WgQQNJUmZmpmJjYzV16lRJUsOGDeXq6qqtW7eqZ8+ekqTExEQdPnxY06ZNy1cdAADAHHIVdpo0aWK9xNzJyUnHjx9XmTJl8v3mr776qjp06KCwsDBduHBBq1ev1o4dO7R582ZZLBZFRkYqKipKVapUUZUqVRQVFSUvLy/17t1bkuTv768BAwZoxIgRCgwMVEBAgEaOHKk6deqodevW+a4PAADc++y+GishIcFhNwD99ddf9fTTTysxMVH+/v6qW7euNm/erDZt2kiSRo0apfT0dA0ZMkQpKSmKiIjQli1b5Ovra93HzJkz5eLiop49eyo9PV2tWrVSTEyMnJ2dHVIjAAC4t9m9zo4Z5fY6fTNinZ3ihXV2AJhJbr+/7V5BGQAA4F5C2AEAAKZG2AEAAKZm9wTlbMnJyTp27JgsFouqVq3qkKuzAAAAHM3uMztpaWl6+umnVbZsWTVv3lzNmjVT2bJl9dRTTzl0ZWUAAABHsDvsPPfcc/r666/10Ucf6fz580pNTdVHH32kffv2aeDAgQVRIwAAQJ7ZPYz18ccf67PPPlPTpk2tbe3atdPixYvVvn17hxYHAACQX3af2QkMDJS/v3+Odn9//xx3HwcAAChsdoedf/3rX3r55ZeVmJhobUtKStIrr7yicePGObQ4AACA/LJ7GGv+/Pk6ceKEKlSooPLly0uSTp06JXd3d/32229auHChte/+/fsdVykAAEAe2B12unbtWgBlAAAAFAy7w8748eMLog4AAIACwQrKAADA1Ow+s+Pk5CSL5da3yr5+/Xq+CgIAAHAku8PO+vXrbZ5fvXpVBw4c0NKlSzVx4kSHFQYAAOAIdoedLl265Gj7+9//rlq1auk///mPBgwY4JDCAAAAHMFhc3YiIiK0bds2R+0OAADAIRwSdtLT0/X222+rXLlyjtgdAACAw9g9jFWyZEmbCcqGYejChQvy8vLS8uXLHVocAABAftkddmbOnGkTdpycnFS6dGlFRERwbywAAFDk2B12+vXrVwBlAAAAFIxchZ2DBw/meod169bNczEAAACOlquwU79+fVksFhmGIUksKggAAO4ZuboaKyEhQT/++KMSEhL04YcfKjw8XPPmzdOBAwd04MABzZs3T/fdd5/WrVtX0PUCAADYJVdndipUqGD98+OPP67Zs2fr0UcftbbVrVtXYWFhGjduHHdFBwAARYrd6+wcOnRI4eHhOdrDw8N15MgRhxQFAADgKHaHnRo1amjy5Mm6cuWKtS0jI0OTJ09WjRo1HFocAABAftl96fmCBQvUqVMnhYWFqV69epKkb7/9VhaLRR999JHDCwQAAMgPu8POgw8+qISEBC1fvlzff/+9DMNQr1691Lt3b3l7exdEjQAAAHlmd9iRJC8vLz3//POOrgUAAMDh8nQj0GXLlqlp06YKDQ3Vzz//LOnGbST++9//OrQ4AACA/LI77MyfP18vv/yyOnTooJSUFOsigiVLltSsWbMcXR8AAEC+2B123n77bS1evFhjx46Vi8v/j4I1atRIhw4dcmhxAAAA+WV32ElISFCDBg1ytLu7u+vSpUsOKQoAAMBR7A474eHhio+Pz9H+6aefqmbNmo6oCQAAwGHsvhrrlVde0dChQ3XlyhUZhqFvvvlGq1atUnR0tN55552CqBEAACDP7A47zz77rK5du6ZRo0bp8uXL6t27t8qWLau33npLTzzxREHUCAAAkGcWwzCMvL74999/V1ZWlsqUKePImu66tLQ0+fv7KzU1VX5+foVdzl31iqWwK8Dd9EaeP+0AUPTk9vs7T+vsXLt2Tdu2bdO6devk6ekpSTp79qwuXryYt2oBAAAKiN3DWD///LPat2+vU6dOKSMjQ23atJGvr6+mTZumK1euaMGCBQVRJwAAQJ7YfWbnpZdeUqNGjZSSkmI9qyNJ3bp10/bt2x1aHAAAQH7ZfWZn165d+t///ic3Nzeb9goVKuiXX35xWGEAAACOYPeZnaysLOstIv7szJkz8vX1dUhRAAAAjmJ32GnTpo3NPbAsFosuXryo8ePH69FHH3VkbQAAAPlmd9iZOXOmYmNjVbNmTV25ckW9e/dWxYoV9csvv2jq1KkFUSMAoBiLjo7WAw88IF9fX5UpU0Zdu3bVsWPHbPoYhqEJEyYoNDRUnp6eatGihb777jubPi1atJDFYrF5sD5c8WB32AkNDVV8fLxGjhypQYMGqUGDBpoyZYoOHDhwz6+3AwAoemJjYzV06FDt2bNHW7du1bVr19S2bVub+zFOmzZNM2bM0Jw5c7R3714FBwerTZs2unDhgs2+Bg4cqMTEROtj4cKFd/twUAjytaigWbCoIIoLFhWEGfz2228qU6aMYmNj1axZMxmGodDQUEVGRmr06NGSpIyMDAUFBWnq1KkaNGiQpBtndurXr28zFQP3tgJdVPDYsWMaNmyYWrVqpdatW2vYsGH6/vvv81wsAAC5lZqaKkkKCAiQJCUkJCgpKUlt27a19nF3d1fz5s21e/dum9euWLFCpUqVUq1atTRy5MgcZ35gTnaHnQ8++EC1a9dWXFyc6tWrp7p162r//v2qU6eO1q5dWxA1AgAg6cbcnJdffllNmzZV7dq1JUlJSUmSpKCgIJu+QUFB1m2S1KdPH61atUo7duzQuHHjtG7dOnXv3v3uFY9CY/c6O6NGjdKYMWP073//26Z9/PjxGj16tB5//HGHFQcAwJ8NGzZMBw8e1K5du3Jss1hsx+UNw7BpGzhwoPXPtWvXVpUqVdSoUSPt379f999/f8EVjUJn95mdpKQkPfPMMznan3rqKZsEDQCAI7344ovauHGjvvjiC5UrV87aHhwcLEk5voOSk5NznO35s/vvv1+urq764YcfCqZgFBl2h50WLVpo586dOdp37dqlv/3tbw4pCgCAbIZhaNiwYfrwww/1+eefKzw83GZ7eHi4goODtXXrVmtbZmamYmNj1aRJk1vu97vvvtPVq1cVEhJSYLWjaLB7GKtz584aPXq04uLi9NBDD0mS9uzZo7Vr12rixInauHGjTV8AAPJj6NChWrlypf773//K19fXegbH399fnp6eslgsioyMVFRUlKpUqaIqVaooKipKXl5e6t27tyTp5MmTWrFihR599FGVKlVKR44c0YgRI9SgQQM9/PDDhXl4uAvsvvTcySl3J4MsFstNbytRFHHpOYoLLj3Hveivc3Gyvffee+rXr5+kG2d/Jk6cqIULFyolJUURERGaO3eudRLz6dOn9dRTT+nw4cO6ePGiwsLC9Nhjj2n8+PHWq7pw78nt9zfr7Iiwg+KDsAPATAp0nR0AAIB7Ra7n7Hz99df6448/1KFDB2vb+++/r/Hjx+vSpUvq2rWr3n77bbm7u+f6zaOjo/Xhhx/q+++/l6enp5o0aaKpU6eqWrVq1j7ZpyYXLVpkc2qyVq1a1j4ZGRkaOXKkVq1apfT0dLVq1Urz5s2zma0PAMUNZ26LF87c3lquz+xMmDBBBw8etD4/dOiQBgwYoNatW+uf//ynNm3apOjoaLve3FH3O4mMjNT69eu1evVq7dq1SxcvXlTHjh3vmTlDAACg4OR6zk5ISIg2bdqkRo0aSZLGjh2r2NhY68JOa9eu1fjx43XkyJE8F5OX+52kpqaqdOnSWrZsmXr16iVJOnv2rMLCwvTJJ5+oXbt2d3xf5uyguOB/fsULn+/ipTh+vh0+ZyclJcVmcabY2Fi1b9/e+vyBBx7Q6dOn81juDXm530lcXJyuXr1q0yc0NFS1a9fOcU+UbBkZGUpLS7N5AAAAc8p12AkKClJCQoKkG4s17d+/X40bN7Zuv3DhglxdXfNcSF7vd5KUlCQ3NzeVLFnyln3+Kjo6Wv7+/tZHWFhYnusGAABFW67DTvv27fXPf/5TO3fu1JgxY+Tl5WWzYvLBgwd133335bmQ7PudrFq1Kse2O93v5GZu12fMmDFKTU21PvJ7RgoAABRduQ47kydPlrOzs5o3b67Fixdr8eLFcnNzs25/9913bYaS7JGf+50EBwcrMzNTKSkpt+zzV+7u7vLz87N5AAAAc8p12CldurR27typlJQUpaSkqFu3bjbbsyco28MR9ztp2LChXF1dbfokJibq8OHDt70nCgAAKB7svjeWv7//Tdvzsty2I+534u/vrwEDBmjEiBEKDAxUQECARo4cqTp16qh169Z21wQAAMzF7rDjSPPnz5d0407qf/bn+52MGjVK6enpGjJkiHVRwS1btsjX19faf+bMmXJxcVHPnj2tiwrGxMTI2dn5bh0KAAAoorg3llhnB8VHcVyHozjj8128FMfPN/fGAgAAEGEHAACYHGEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYWqGGnS+//FKdOnVSaGioLBaLNmzYYLPdMAxNmDBBoaGh8vT0VIsWLfTdd9/Z9MnIyNCLL76oUqVKydvbW507d9aZM2fu4lEAAICirFDDzqVLl1SvXj3NmTPnptunTZumGTNmaM6cOdq7d6+Cg4PVpk0bXbhwwdonMjJS69ev1+rVq7Vr1y5dvHhRHTt21PXr1+/WYQAAgCLMpTDfvEOHDurQocNNtxmGoVmzZmns2LHq3r27JGnp0qUKCgrSypUrNWjQIKWmpmrJkiVatmyZWrduLUlavny5wsLCtG3bNrVr1+6uHQsAACiaiuycnYSEBCUlJalt27bWNnd3dzVv3ly7d++WJMXFxenq1as2fUJDQ1W7dm1rn5vJyMhQWlqazQMAAJhTkQ07SUlJkqSgoCCb9qCgIOu2pKQkubm5qWTJkrfsczPR0dHy9/e3PsLCwhxcPQAAKCqKbNjJZrFYbJ4bhpGj7a/u1GfMmDFKTU21Pk6fPu2QWgEAQNFTZMNOcHCwJOU4Q5OcnGw92xMcHKzMzEylpKTcss/NuLu7y8/Pz+YBAADMqciGnfDwcAUHB2vr1q3WtszMTMXGxqpJkyaSpIYNG8rV1dWmT2Jiog4fPmztAwAAirdCvRrr4sWLOnHihPV5QkKC4uPjFRAQoPLlyysyMlJRUVGqUqWKqlSpoqioKHl5eal3796SJH9/fw0YMEAjRoxQYGCgAgICNHLkSNWpU8d6dRYAACjeCjXs7Nu3Ty1btrQ+f/nllyVJffv2VUxMjEaNGqX09HQNGTJEKSkpioiI0JYtW+Tr62t9zcyZM+Xi4qKePXsqPT1drVq1UkxMjJydne/68QAAgKLHYhiGUdhFFLa0tDT5+/srNTW12M3feeX2c71hMm8U+0978cLnu3gpjp/v3H5/F9k5OwAAAI5A2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKbmUtgFFAWGYUiS0tLSCrmSuy+jsAvAXVUM/4kXa3y+i5fi+PnO/t7O/h6/FYtxpx7FwJkzZxQWFlbYZQAAgDw4ffq0ypUrd8vthB1JWVlZOnv2rHx9fWWxWAq7HBSwtLQ0hYWF6fTp0/Lz8yvscgA4EJ/v4sUwDF24cEGhoaFycrr1zByGsSQ5OTndNhHCnPz8/PhlCJgUn+/iw9/f/459mKAMAABMjbADAABMjbCDYsfd3V3jx4+Xu7t7YZcCwMH4fONmmKAMAABMjTM7AADA1Ag7AADA1Ag7AADA1Ag7QD5UrFhRs2bNKuwyAPxJixYtFBkZWdhloAgh7KBI6NevnywWi6ZMmWLTvmHDhiK9qvXevXv1/PPPF3YZwD0vOTlZgwYNUvny5eXu7q7g4GC1a9dOX331lSTJYrFow4YNudrXhx9+qEmTJhVgtbjXsIIyigwPDw9NnTpVgwYNUsmSJQu7nNvKzMyUm5ubSpcuXdilAKbQo0cPXb16VUuXLlWlSpX066+/avv27frjjz9yvY+rV6/K1dVVAQEBBVgp7kWc2UGR0bp1awUHBys6Ovqm2ydMmKD69evbtM2aNUsVK1a0Pu/Xr5+6du2qqKgoBQUFqUSJEpo4caKuXbumV155RQEBASpXrpzeffddm/388ssv6tWrl0qWLKnAwEB16dJFP/30U479RkdHKzQ0VFWrVpWUcxjr/Pnzev755xUUFCQPDw/Vrl1bH330Ub5+LoDZnT9/Xrt27dLUqVPVsmVLVahQQQ8++KDGjBmjxx57zPoZ79atmywWi/V59u+Ed999V5UqVZK7u7sMw8gxjFWxYkVFRUWpf//+8vX1Vfny5bVo0SKbGnbv3q369evLw8NDjRo1sp5Vjo+Pvzs/BBQowg6KDGdnZ0VFRentt9/WmTNn8ryfzz//XGfPntWXX36pGTNmaMKECerYsaNKliypr7/+WoMHD9bgwYN1+vRpSdLly5fVsmVL+fj46Msvv9SuXbvk4+Oj9u3bKzMz07rf7du36+jRo9q6detNA0xWVpY6dOig3bt3a/ny5Tpy5IimTJkiZ2fnPB8LUBz4+PjIx8dHGzZsUEZGRo7te/fulSS99957SkxMtD6XpBMnTmjNmjVat27dbYPJ9OnT1ahRIx04cEBDhgzRCy+8oO+//16SdOHCBXXq1El16tTR/v37NWnSJI0ePdqxB4lCxTAWipRu3bqpfv36Gj9+vJYsWZKnfQQEBGj27NlycnJStWrVNG3aNF2+fFmvvvqqJGnMmDGaMmWK/ve//+mJJ57Q6tWr5eTkpHfeecc6P+i9995TiRIltGPHDrVt21aS5O3trXfeeUdubm43fd9t27bpm2++0dGjR61nfipVqpSnYwCKExcXF8XExGjgwIFasGCB7r//fjVv3lxPPPGE6tatax0uLlGihIKDg21em5mZqWXLlt1xSPnRRx/VkCFDJEmjR4/WzJkztWPHDlWvXl0rVqyQxWLR4sWL5eHhoZo1a+qXX37RwIEDC+aAcddxZgdFztSpU7V06VIdOXIkT6+vVauWnJz+/592UFCQ6tSpY33u7OyswMBAJScnS5Li4uJ04sQJ+fr6Wv+HGRAQoCtXrujkyZPW19WpU+eWQUeS4uPjVa5cOWvQAZB7PXr00NmzZ7Vx40a1a9dOO3bs0P3336+YmJjbvq5ChQq5mjtXt25d658tFouCg4OtvwOOHTumunXrysPDw9rnwQcfzNuBoEjizA6KnGbNmqldu3Z69dVX1a9fP2u7k5OT/np3k6tXr+Z4vaurq81zi8Vy07asrCxJN4afGjZsqBUrVuTY159/iXp7e9+2bk9Pz9tuB3B7Hh4eatOmjdq0aaPXXntNzz33nMaPH2/ze+Cv7vS5zHa73wGGYeS46pM7KZkLZ3ZQJE2ZMkWbNm3S7t27rW2lS5dWUlKSzS8hR0wevP/++/XDDz+oTJkyqly5ss3D398/1/upW7euzpw5o+PHj+e7JgBSzZo1denSJUk3wsr169cL5H2qV6+ugwcP2swX2rdvX4G8FwoHYQdFUp06ddSnTx+9/fbb1rYWLVrot99+07Rp03Ty5EnNnTtXn376ab7fq0+fPipVqpS6dOminTt3KiEhQbGxsXrppZfsmijdvHlzNWvWTD169NDWrVuVkJCgTz/9VJs3b853jYCZnTt3To888oiWL1+ugwcPKiEhQWvXrtW0adPUpUsXSTeuqNq+fbuSkpKUkpLi0Pfv3bu3srKy9Pzzz+vo0aP67LPP9Oabb0pSkV7nC7lH2EGRNWnSJJuzODVq1NC8efM0d+5c1atXT998841GjhyZ7/fx8vLSl19+qfLly6t79+6qUaOG+vfvr/T0dPn5+dm1r3Xr1umBBx7Qk08+qZo1a2rUqFEF9r9RwCx8fHwUERGhmTNnqlmzZqpdu7bGjRungQMHas6cOZJuXE21detWhYWFqUGDBg59fz8/P23atEnx8fGqX7++xo4dq9dee02SbObx4N5lMRiYBADAxooVK/Tss88qNTWV+XgmwARlAECx9/7776tSpUoqW7asvv32W40ePVo9e/Yk6JgEYQcAUOwlJSXptddeU1JSkkJCQvT444/r9ddfL+yy4CAMYwEAAFNjgjIAADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4A/MmOHTtksVh0/vz5fO2nX79+6tq1q0NqApA/hB0Ad0WLFi0UGRmZo33Dhg029x+KiYmRxWKRxWKRs7OzSpYsqYiICP373/9WamqqzWv79etn7fvnx4kTJ25aw82CzNmzZ1W7dm01bdpU58+fV5MmTZSYmGjXTWABFG2EHQBFjp+fnxITE3XmzBnt3r1bzz//vN5//33Vr19fZ8+etenbvn17JSYm2jzCw8Nz9T4nT55U06ZNVb58eW3ZskUlSpSQm5ubgoODuQEkYCKEHQBFjsViUXBwsEJCQlSjRg0NGDBAu3fv1sWLFzVq1Cibvu7u7goODrZ5ODs73/E9Dh48qKZNmyoiIkL//e9/5eXlJSnn2Z+YmBiVKFFCn332mWrUqCEfHx9rwMp2/fp1vfzyyypRooQCAwM1atQosV4rUHQQdgDcE8qUKaM+ffpo48aN+b6T/O7du9W8eXN1795dK1askKur6237X758WW+++aaWLVumL7/8UqdOndLIkSOt26dPn653331XS5Ys0a5du/THH39o/fr1+aoRgOMQdgDcM6pXr64LFy7o3Llz1raPPvpIPj4+1sfjjz9+x/1069ZNnTp10ty5c+XkdOdfg1evXtWCBQvUqFEj3X///Ro2bJi2b99u3T5r1iyNGTNGPXr0UI0aNbRgwQLm/ABFCDcCBXDPyB4a+vN8mpYtW2r+/PnW597e3nfcT5cuXbR+/Xrt3LlTf/vb3+7Y38vLS/fdd5/1eUhIiJKTkyVJqampSkxMVOPGja3bXVxc1KhRI4aygCKCMzsA7go/P78cV1NJ0vnz5+Xn55erfRw9elR+fn4KDAy0tnl7e6ty5crWR0hIyB33s3DhQj355JPq0KGDYmNj79j/r8NcFouFIAPcQwg7AO6K6tWra9++fTna9+7dq2rVqt3x9cnJyVq5cqW6du2aq6Gn27FYLFq4cKGefvppPfroo9qxY0ee9+Xv76+QkBDt2bPH2nbt2jXFxcXlq0YAjsMwFoC7YsiQIZozZ46GDh2q559/Xp6entq6dauWLFmiZcuW2fQ1DENJSUkyDEPnz5/XV199paioKPn7+2vKlCkOqcdisWjevHlydnbWY489pk2bNumRRx7J075eeuklTZkyRVWqVFGNGjU0Y8aMfC9KCMBxCDsA7oqKFStq586dGjt2rNq2basrV66oatWqiomJyTGpOC0tTSEhIbJYLPLz81O1atXUt29fvfTSS7ke8soNi8WiOXPmyNnZWR07dtTGjRvl4mL/r8URI0YoMTFR/fr1k5OTk/r3769u3brddNgOwN1nMRh4BgAAJsacHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGr/B+uyMjHiX0PvAAAAAElFTkSuQmCC",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ax = performance_df.plot.bar(\n",
+    "    color=\"#7400ff\",\n",
+    "    ylim=(1, 550),\n",
+    "    rot=0,\n",
+    "    xlabel=\"UDF Kind\",\n",
+    "    ylabel=\"Speedup factor\",\n",
+    ")\n",
+    "ax.bar_label(ax.containers[0], fmt=\"%.0f\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## User-defined function (UDF) performance (without JIT overhead)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pandas_int_udf, cudf_int_udf = timeit_pandas_cudf(\n",
+    "    pdf_age, gdf_age, lambda df: df.apply(age_udf, axis=1), number=10\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pandas_str_udf, cudf_str_udf = timeit_pandas_cudf(\n",
+    "    pd_series, gd_series, lambda s: s.apply(str_isupper_udf), number=10\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cudf speedup vs. pandas</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Numeric</th>\n",
+       "      <td>95448.144630</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>String</th>\n",
+       "      <td>2587.570338</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         cudf speedup vs. pandas\n",
+       "Numeric             95448.144630\n",
+       "String               2587.570338"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "performance_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"cudf speedup vs. pandas\": [\n",
+    "            pandas_int_udf / cudf_int_udf,\n",
+    "            pandas_str_udf / cudf_str_udf,\n",
+    "        ]\n",
+    "    },\n",
+    "    index=[\"Numeric\", \"String\"],\n",
+    ")\n",
+    "performance_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below is the plot showing performance speedup in case of Numeric UDFs & String UDFs on their consequent runs. In this case the speedup is massive because of no JIT overhead present."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAG2CAYAAAC9CcgAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABMBElEQVR4nO3deVwVdf///+cBFQHhiCBboWJuKG5puZbmXuL+UUsv0jSXS43Mrey6Srs0txRLySWzNJdoMU3LNcuFyzWU1HJJQ8WE8FIEV1CY3x/+nG9HXEBHBX3cb7dzu3lmXmfOa04X5zyv98y8x2YYhiEAAADcMaf73QAAAMCDgmAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGCR+xqsNmzYoFatWikwMFA2m01LlixxWG8YhkaOHKnAwEC5urqqYcOG+vXXXx1q0tPT9corr8jHx0fu7u5q3bq1jh075lCTkpKi8PBw2e122e12hYeH6/Tp0w41R48eVatWreTu7i4fHx9FREQoIyPDoWb37t1q0KCBXF1d9cgjj+g///mPuCMQAAC46r4Gq3Pnzqlq1aqKioq67voJEyYoMjJSUVFR2r59u/z9/dW0aVOdOXPGrBk4cKAWL16s6OhoxcTE6OzZswoLC1NmZqZZ06VLF8XFxWnlypVauXKl4uLiFB4ebq7PzMxUy5Ytde7cOcXExCg6OlqLFi3S4MGDzZq0tDQ1bdpUgYGB2r59u6ZOnaqJEycqMjLyLnwyAAAgXzLyCEnG4sWLzedZWVmGv7+/MW7cOHPZxYsXDbvdbsyYMcMwDMM4ffq0UbBgQSM6Otqs+fPPPw0nJydj5cqVhmEYxm+//WZIMrZs2WLWbN682ZBk7Nu3zzAMw1i+fLnh5ORk/Pnnn2bN559/bri4uBipqamGYRjGtGnTDLvdbly8eNGsGTt2rBEYGGhkZWVZ+EkAAID8qsB9znU3FB8fr6SkJDVr1sxc5uLiogYNGmjTpk3q06ePYmNjdenSJYeawMBAhYaGatOmTWrevLk2b94su92uWrVqmTW1a9eW3W7Xpk2bVL58eW3evFmhoaEKDAw0a5o3b6709HTFxsbqmWee0ebNm9WgQQO5uLg41AwfPlyHDx9WcHDwdfcjPT1d6enp5vOsrCydOnVK3t7estlslnxWAADg7jIMQ2fOnFFgYKCcnG58wC/PBqukpCRJkp+fn8NyPz8/HTlyxKwpVKiQvLy8stVcfX1SUpJ8fX2zbd/X19eh5tr38fLyUqFChRxqSpUqle19rq67UbAaO3as3nnnnVvuLwAAyPsSEhL06KOP3nB9ng1WV107qmMYxi1Heq6tuV69FTXG/3/i+s36GT58uAYNGmQ+T01NVYkSJZSQkCBPT8+b7gcAAMgb0tLSFBQUJA8Pj5vW5dlg5e/vL+nKaFBAQIC5PDk52Rwp8vf3V0ZGhlJSUhxGrZKTk1W3bl2z5q+//sq2/RMnTjhsZ+vWrQ7rU1JSdOnSJYeaq6NXf38fKfuo2t+5uLg4HD68ytPTk2AFAEA+c6vBnTw7j1VwcLD8/f21Zs0ac1lGRobWr19vhqYaNWqoYMGCDjWJiYnas2ePWVOnTh2lpqZq27ZtZs3WrVuVmprqULNnzx4lJiaaNatXr5aLi4tq1Khh1mzYsMFhCobVq1crMDAw2yFCAADwkLqfZ86fOXPG2Llzp7Fz505DkhEZGWns3LnTOHLkiGEYhjFu3DjDbrcb33zzjbF7927jhRdeMAICAoy0tDRzG3379jUeffRR44cffjB27NhhNGrUyKhatapx+fJls6ZFixZGlSpVjM2bNxubN282KleubISFhZnrL1++bISGhhqNGzc2duzYYfzwww/Go48+agwYMMCsOX36tOHn52e88MILxu7du41vvvnG8PT0NCZOnJirfU5NTTUkmVcbIm9JS0szXn31VaNEiRJG4cKFjTp16hjbtm0z13fr1s2Q5PCoVavWdbeVlZVltGjRItsVr3938eJFo2rVqoYkY+fOnQ7rtm3bZjRq1Miw2+1G0aJFjaZNm2arAQDcGzn9/b6vweqnn37K9iMlyejWrZthGFd+mEaMGGH4+/sbLi4uxtNPP23s3r3bYRsXLlwwBgwYYBQrVsxwdXU1wsLCjKNHjzrUnDx50ujatavh4eFheHh4GF27djVSUlIcao4cOWK0bNnScHV1NYoVK2YMGDDAYWoFwzCMXbt2GU899ZTh4uJi+Pv7GyNHjsz1VAsEq7ytU6dORsWKFY3169cbv//+uzFixAjD09PTOHbsmGEYV4JVixYtjMTERPNx8uTJ624rMjLSePbZZ28arCIiIsyav4emtLQ0w8vLy+jevbuxb98+Y8+ePUaHDh0MX19fIyMjw+rdBgDcQk5/v22GwdTh91JaWprsdrtSU1M5xyqPuXDhgjw8PPTtt9+qZcuW5vJq1aopLCxMo0ePVvfu3XX69Olsdwm41i+//KKwsDBt375dAQEBWrx4sdq2betQs2LFCg0aNEiLFi1SpUqVtHPnTlWrVk2S9PPPP+uJJ57Q0aNHFRQUJOnKzP9VqlTRwYMH9dhjj1m568ijMjMzdenSpfvdBvBQKFiwoJydnW+4Pqe/33n25HXgXrt8+bIyMzNVuHBhh+Wurq6KiYkxn69bt06+vr4qWrSoGjRooHfffddhSo/z58/rhRdeUFRUlHkRxrX++usv9erVS0uWLJGbm1u29eXLl5ePj49mz56tN998U5mZmZo9e7YqVaqkkiVLWrTHyKsMw1BSUlK2W28BuLuKFi0qf3//O5pnkmAF/P88PDxUp04djRo1SiEhIfLz89Pnn3+urVu3qmzZspKkZ599Vh07dlTJkiUVHx+vt956S40aNVJsbKx59edrr72munXrqk2bNtd9H8Mw1L17d/Xt21c1a9bU4cOHr9vLunXr1KZNG40aNUqSVK5cOa1atUoFCvBn+6C7Gqp8fX3l5ubGZMLAXWYYhs6fP29e7f/32Qhyi29o4G/mzZunHj166JFHHpGzs7Mef/xxdenSRTt27JAkde7c2awNDQ1VzZo1VbJkSX3//fdq3769li5dqh9//FE7d+684XtMnTpVaWlpGj58+A1rLly4oB49eqhevXr6/PPPlZmZqYkTJ+q5557T9u3b5erqat1OI0/JzMw0Q5W3t/f9bgd4aFz9Xk1OTpavr+9NDwveTJ6dbgG4Hx577DGtX79eZ8+eVUJCgrZt26ZLly7dcGb9gIAAlSxZUr///rsk6ccff9ShQ4dUtGhRFShQwBxd6tChgxo2bGjWbNmyRS4uLipQoIDKlCkjSapZs6a6desmSVq4cKEOHz6sTz/9VE888YRq166thQsXKj4+Xt9+++1d/hRwP109p+p6h4gB3F1X/+7u5NxGRqyA63B3d5e7u7tSUlK0atUqTZgw4bp1J0+eVEJCgjls/MYbb+jll192qKlcubImT56sVq1aSZKmTJmi0aNHm+uPHz+u5s2b64svvjDvaXn+/Hk5OTk5HAK6+jwrK8vSfUXexOE/4N6z4u+OYAX8zapVq2QYhsqXL6+DBw9q6NChKl++vF566SWdPXtWI0eOVIcOHRQQEKDDhw/rzTfflI+Pj9q1ayfpygz91zthvUSJEuaoV4kSJRzWFSlSRNKV0bKr959q2rSphg4dqv79++uVV15RVlaWxo0bpwIFCuiZZ565mx8BAOAOcCgQ+JvU1FT1799fFSpU0Isvvqj69etr9erV5mW4u3fvVps2bVSuXDl169ZN5cqV0+bNm29576jcqlChgpYtW6Zdu3apTp06euqpp3T8+HGtXLnyjk6qBPKbOXPmqGjRog7LPvroIwUFBcnJyUnvv//+fenrdhw+fFg2m01xcXH3u5V8rVSpUnn6vzsjVsDfdOrUSZ06dbruOldXV61atSrX27zVVHGlSpW6bk3Tpk3VtGnTXL8fHlxD7+HRwffy6AyHaWlpGjBggCIjI9WhQwfZ7fb73RLggGAFAMg3jh49qkuXLqlly5aM3iJP4lAgAMASWVlZGj9+vMqUKSMXFxeVKFFC7777rqQrE+vabDaHSU/j4uJks9kc5nKbM2eOSpQoITc3N7Vr104nT550WFe5cmVJUunSpbO99qqMjAwNGDBAAQEBKly4sEqVKqWxY8ea6202m6ZPn65nn31Wrq6uCg4O1ldffeWwjT///FOdO3eWl5eXvL291aZNm2zv9emnnyokJESFCxdWhQoVNG3aNIf127ZtU/Xq1VW4cGHVrFkz2zQs1zvMuWTJEocTqEeOHKlq1app5syZCgoKkpubmzp27HjDyWOzsrL06KOPasaMGQ7Ld+zYIZvNpj/++MPcbokSJeTi4qLAwEBFRERcd3vXc/WQZnR0tOrWravChQurUqVKWrdunVmTmZmpnj17Kjg4WK6uripfvrw++OADh+10795dbdu21cSJExUQECBvb2/179/f4Yq85ORktWrVyvzvtGDBgmz9REZGqnLlynJ3d1dQUJD69euns2fPmuuPHDmiVq1aycvLS+7u7qpUqZKWL1+e4/3NLUascM/cy8MYuP/y6qEk3D3Dhw/XrFmzNHnyZNWvX1+JiYnat29fjl+/detW9ejRQ2PGjFH79u21cuVKjRgxwlzfuXNnBQUFqUmTJtq2bZuCgoJUvHjxbNuZMmWKli5dqi+//FIlSpRQQkKCEhISHGreeustjRs3Th988IHmzZunF154QaGhoQoJCdH58+f1zDPP6KmnntKGDRtUoEABjR49Wi1atNCuXbtUqFAhzZo1SyNGjFBUVJSqV6+unTt3qlevXnJ3d1e3bt107tw5hYWFqVGjRpo/f77i4+P16quv3tbnevDgQX355ZdatmyZ0tLS1LNnT/Xv3/+6IcPJyUnPP/+8FixYoL59+5rLFy5cqDp16qh06dL6+uuvNXnyZEVHR6tSpUpKSkrSL7/8kuu+hg4dqvfff18VK1ZUZGSkWrdurfj4eHl7e5sB78svv5SPj482bdqk3r17KyAgwOF0i59++kkBAQH66aefdPDgQXXu3FnVqlVTr169JF0JXwkJCfrxxx9VqFAhRUREmJN4/n2fp0yZolKlSik+Pl79+vXTsGHDzKDbv39/ZWRkaMOGDXJ3d9dvv/1mXjR0NxCsAAB37MyZM/rggw8UFRVlzsf22GOPqX79+jnexgcffKDmzZvrjTfekHTlbgObNm3SypUrJV05z/HqpKnFixe/4S2jjh49qrJly6p+/fqy2WzXvQ1Ux44dzalRRo0apTVr1mjq1KmaNm2aoqOj5eTkpI8//tgcPfr0009VtGhRrVu3Ts2aNdOoUaM0adIktW/fXpIUHBys3377TTNnzlS3bt20YMECZWZm6pNPPpGbm5sqVaqkY8eO6Z///GeOP4+rLl68qLlz55pXDU+dOlUtW7bUpEmTrvsZdO3aVZGRkTpy5IhKliyprKwsRUdH68033zQ/H39/fzVp0kQFCxZUiRIl9OSTT+a6rwEDBqhDhw6SpOnTp2vlypWaPXu2hg0bpoIFC+qdd94xa4ODg7Vp0yZ9+eWXDsHKy8tLUVFRcnZ2VoUKFdSyZUutXbtWvXr10oEDB7RixQpt2bLFnIpm9uzZCgkJcehj4MCBDu8zatQo/fOf/zSD1dGjR9WhQweH0c67iUOBAIA7tnfvXqWnp6tx48Z3tI06deo4LLv2eU50795dcXFxKl++vCIiIrR69epsNdd7n71790qSYmNjdfDgQXl4eKhIkSIqUqSIihUrposXL+rQoUM6ceKEEhIS1LNnT3N9kSJFNHr0aB06dMjcl6pVqzpM9Ho7+yJdmaLlaqi6up2srCzt37//uvXVq1dXhQoV9Pnnn0uS1q9fr+TkZDPQdOzYURcuXFDp0qXVq1cvLV68WJcvX851X3/fnwIFCqhmzZrmZyhJM2bMUM2aNVW8eHEVKVJEs2bN0tGjRx22UalSJYcZzgMCAswRqb1795rbvapChQrZDp/+9NNPatq0qR555BF5eHjoxRdf1MmTJ3Xu3DlJUkREhEaPHq169eppxIgR2rVrV673NTcIVgCAO3ar2yw5OV35ufn7FbDXzm59qytoc+rxxx9XfHy8Ro0apQsXLqhTp076v//7v1u+7uroVFZWlmrUqKG4uDiHx4EDB9SlSxdzkt5Zs2Y5rN+zZ4+2bNmS431xcnLKVpeTGb+v9nmzySy7du2qhQsXSrpyGLB58+by8fGRJAUFBWn//v368MMP5erqqn79+unpp5++o9nGr+3tyy+/1GuvvaYePXpo9erViouL00svvaSMjAyH+oIFC2Z7/dXP9+pnc7P9PHLkiJ577jmFhoZq0aJFio2N1Ycffijp/32WL7/8sv744w+Fh4dr9+7dqlmzpqZOnXrH+3ojBCsAwB0rW7asXF1dtXbt2uuuv3ouVGJiorns2vmcKlasaAaTq659nlOenp7q3LmzZs2apS+++EKLFi3SqVOnbrjdLVu2qEKFCpKuBLPff/9dvr6+KlOmjMPDbrfLz89PjzzyiP74449s669OBFyxYkX98ssvunDhwg3fs3jx4jpz5ow5snK9z0S6cijr+PHj5vPNmzfLyclJ5cqVu+H+d+nSRbt371ZsbKy+/vprde3a1WG9q6urWrdurSlTpmjdunXavHmzdu/efcPtXc/f9+fy5cuKjY01P8ONGzeqbt266tevn6pXr64yZcqYo3k5FRISosuXL+vnn382l+3fv9/hxP2ff/5Zly9f1qRJk1S7dm2VK1fO4bO6KigoSH379tU333yjwYMHa9asWbnqJTcIVgCAO1a4cGG9/vrrGjZsmD777DMdOnRIW7Zs0ezZsyVJZcqUUVBQkEaOHKkDBw7o+++/16RJkxy2ERERoZUrV2rChAk6cOCAoqKizPOrcuPqidn79u3TgQMH9NVXX8nf39/hENJXX32lTz75RAcOHNCIESO0bds2DRgwQNKV0R4fHx+1adNGGzduVHx8vNavX69XX31Vx44dk3TlqrqxY8fqgw8+0IEDB7R79259+umnioyMlHQl2Dg5Oalnz5767bfftHz5ck2cONGhz1q1asnNzU1vvvmmDh48qIULF2rOnDnX/Wy7deumX375RRs3blRERIQ6dep0w3PMpCvnGtWtW1c9e/bU5cuX1aZNG3PdnDlzNHv2bO3Zs0d//PGH5s2bJ1dXV/NctOHDh+vFF1+85ef84YcfavHixdq3b5/69++vlJQU9ejRQ9KV/94///yzVq1apQMHDuitt97S9u3bb7nNvytfvrxatGihXr16aevWrYqNjdXLL7/sMDr62GOP6fLly5o6daq5L9deETlw4ECtWrVK8fHx2rFjh3788cds52lZiWAFALDEW2+9pcGDB+vtt99WSEiIOnfubJ4vU7BgQX3++efat2+fqlatqvHjxzvcM1OSateurY8//lhTp05VtWrVtHr1av373//OdR9FihTR+PHjVbNmTT3xxBM6fPiwli9fbh6OlKR33nlH0dHRqlKliubOnasFCxaoYsWKkq7ciHfDhg0qUaKE2rdvr5CQEPXo0UMXLlyQp6enpCuHlz7++GNzCogGDRpozpw55ohVkSJFtGzZMv3222+qXr26/vWvf2n8+PEOfRYrVkzz58/X8uXLVblyZX3++ecaOXJktv0pU6aM2rdvr+eee07NmjVTaGhotqkdrqdr16765Zdf1L59e4cwUrRoUc2aNUv16tVTlSpVtHbtWi1btsy8MCAxMTHbuVDXM27cOI0fP15Vq1bVxo0b9e2335qHG/v27av27durc+fOqlWrlk6ePKl+/frdcpvX+vTTTxUUFKQGDRqoffv26t27t3x9fc311apVU2RkpMaPH6/Q0FAtWLDAYWoN6crUD/3791dISIhatGih8uXL5+jzu102w6qD2siRtLQ02e12paammn+gDwumW3i4MN3C7bl48aLi4+MVHByswoUL3+92Hkg2m02LFy9W27Zt73crtzRy5EgtWbIkT90G5/DhwwoODtbOnTtVrVq1+92OpW7295fT329GrAAAACxCsAIAALAIE4QCAB4q+ekMmJEjR173vKv76UY3jscVjFgBAABYhGAFAHkQIwLAvWfF3x3BCgDykKszUZ8/f/4+dwI8fK7+3V07I3xucI4VAOQhzs7OKlq0qDn/k5ub201v6QHgzhmGofPnzys5OVlFixZ1uH9hbhGsACCPuTqj9tVwBeDeKFq06E1ntM8JghUA5DE2m00BAQHy9fW15Ma4AG6tYMGCdzRSdRXBCgDyKGdnZ0u+6AHcO5y8DgAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFsnTwery5cv697//reDgYLm6uqp06dL6z3/+o6ysLLPGMAyNHDlSgYGBcnV1VcOGDfXrr786bCc9PV2vvPKKfHx85O7urtatW+vYsWMONSkpKQoPD5fdbpfdbld4eLhOnz7tUHP06FG1atVK7u7u8vHxUUREhDIyMu7a/gMAgPwlTwer8ePHa8aMGYqKitLevXs1YcIEvffee5o6dapZM2HCBEVGRioqKkrbt2+Xv7+/mjZtqjNnzpg1AwcO1OLFixUdHa2YmBidPXtWYWFhyszMNGu6dOmiuLg4rVy5UitXrlRcXJzCw8PN9ZmZmWrZsqXOnTunmJgYRUdHa9GiRRo8ePC9+TAAAECeZzMMw7jfTdxIWFiY/Pz8NHv2bHNZhw4d5Obmpnnz5skwDAUGBmrgwIF6/fXXJV0ZnfLz89P48ePVp08fpaamqnjx4po3b546d+4sSTp+/LiCgoK0fPlyNW/eXHv37lXFihW1ZcsW1apVS5K0ZcsW1alTR/v27VP58uW1YsUKhYWFKSEhQYGBgZKk6Ohode/eXcnJyfL09MzRPqWlpclutys1NTXHr3lQDLXd7w5wL72XZ79ZACD3cvr7nadHrOrXr6+1a9fqwIEDkqRffvlFMTExeu655yRJ8fHxSkpKUrNmzczXuLi4qEGDBtq0aZMkKTY2VpcuXXKoCQwMVGhoqFmzefNm2e12M1RJUu3atWW32x1qQkNDzVAlSc2bN1d6erpiY2NvuA/p6elKS0tzeAAAgAdTgfvdwM28/vrrSk1NVYUKFeTs7KzMzEy9++67euGFFyRJSUlJkiQ/Pz+H1/n5+enIkSNmTaFCheTl5ZWt5urrk5KS5Ovrm+39fX19HWqufR8vLy8VKlTIrLmesWPH6p133snNbgMAgHwqT49YffHFF5o/f74WLlyoHTt2aO7cuZo4caLmzp3rUGezOR5jMgwj27JrXVtzvfrbqbnW8OHDlZqaaj4SEhJu2hcAAMi/8vSI1dChQ/XGG2/o+eeflyRVrlxZR44c0dixY9WtWzf5+/tLujKaFBAQYL4uOTnZHF3y9/dXRkaGUlJSHEatkpOTVbduXbPmr7/+yvb+J06ccNjO1q1bHdanpKTo0qVL2Uay/s7FxUUuLi63s/sAACCfydMjVufPn5eTk2OLzs7O5nQLwcHB8vf315o1a8z1GRkZWr9+vRmaatSooYIFCzrUJCYmas+ePWZNnTp1lJqaqm3btpk1W7duVWpqqkPNnj17lJiYaNasXr1aLi4uqlGjhsV7DgAA8qM8PWLVqlUrvfvuuypRooQqVaqknTt3KjIyUj169JB05dDcwIEDNWbMGJUtW1Zly5bVmDFj5Obmpi5dukiS7Ha7evbsqcGDB8vb21vFihXTkCFDVLlyZTVp0kSSFBISohYtWqhXr16aOXOmJKl3794KCwtT+fLlJUnNmjVTxYoVFR4ervfee0+nTp3SkCFD1KtXr4fu6j4AAHB9eTpYTZ06VW+99Zb69eun5ORkBQYGqk+fPnr77bfNmmHDhunChQvq16+fUlJSVKtWLa1evVoeHh5mzeTJk1WgQAF16tRJFy5cUOPGjTVnzhw5OzubNQsWLFBERIR59WDr1q0VFRVlrnd2dtb333+vfv36qV69enJ1dVWXLl00ceLEe/BJAACA/CBPz2P1IGIeKzwsmMcKwIPkgZjHCgAAID8hWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFchWsLl++rHfeeUcJCQl3qx8AAIB8K1fBqkCBAnrvvfeUmZl5t/oBAADIt3J9KLBJkyZat27dXWgFAAAgfyuQ2xc8++yzGj58uPbs2aMaNWrI3d3dYX3r1q0taw4AACA/sRmGYeTmBU5ONx7kstlsHCa8hbS0NNntdqWmpsrT0/N+t3NPDbXd7w5wL72Xq28WAMjbcvr7nesRq6ysrDtqDAAA4EHFdAsAAAAWua1gtX79erVq1UplypRR2bJl1bp1a23cuNHq3gAAAPKVXAer+fPnq0mTJnJzc1NERIQGDBggV1dXNW7cWAsXLrwbPQIAAOQLuT55PSQkRL1799Zrr73msDwyMlKzZs3S3r17LW3wQcPJ63hYcPI6gAdJTn+/cz1i9ccff6hVq1bZlrdu3Vrx8fG53RwAAMADI9fBKigoSGvXrs22fO3atQoKCrKkqb/7888/9Y9//EPe3t5yc3NTtWrVFBsba643DEMjR45UYGCgXF1d1bBhQ/36668O20hPT9crr7wiHx8fubu7q3Xr1jp27JhDTUpKisLDw2W322W32xUeHq7Tp0871Bw9elStWrWSu7u7fHx8FBERoYyMDMv3GQAA5E+5nm5h8ODBioiIUFxcnOrWrSubzaaYmBjNmTNHH3zwgaXNpaSkqF69enrmmWe0YsUK+fr66tChQypatKhZM2HCBEVGRmrOnDkqV66cRo8eraZNm2r//v3y8PCQJA0cOFDLli1TdHS0vL29NXjwYIWFhSk2NlbOzs6SpC5duujYsWNauXKlJKl3794KDw/XsmXLJEmZmZlq2bKlihcvrpiYGJ08eVLdunWTYRiaOnWqpfsNAADyp1yfYyVJixcv1qRJk8zzqUJCQjR06FC1adPG0ubeeOMN/fe//73hFYeGYSgwMFADBw7U66+/LunK6JSfn5/Gjx+vPn36KDU1VcWLF9e8efPUuXNnSdLx48cVFBSk5cuXq3nz5tq7d68qVqyoLVu2qFatWpKkLVu2qE6dOtq3b5/Kly+vFStWKCwsTAkJCQoMDJQkRUdHq3v37kpOTs7x+VKcY4WHBedYAXiQ3LVzrCSpXbt25qjNyZMnFRMTY3mokqSlS5eqZs2a6tixo3x9fVW9enXNmjXLXB8fH6+kpCQ1a9bMXObi4qIGDRpo06ZNkqTY2FhdunTJoSYwMFChoaFmzebNm2W3281QJUm1a9eW3W53qAkNDTVDlSQ1b95c6enpDocmr5Wenq60tDSHBwAAeDDlOliVLl1aJ0+ezLb89OnTKl26tCVNXfXHH39o+vTpKlu2rFatWqW+ffsqIiJCn332mSQpKSlJkuTn5+fwOj8/P3NdUlKSChUqJC8vr5vW+Pr6Znt/X19fh5pr38fLy0uFChUya65n7Nix5nlbdrv9rpyHBgAA8oZcB6vDhw9f936A6enp+vPPPy1p6qqsrCw9/vjjGjNmjKpXr64+ffqoV69emj59ukOdzeZ4jMkwjGzLrnVtzfXqb6fmWsOHD1dqaqr5SEhIuGlfAAAg/8rxyetLly41/71q1SrZ7XbzeWZmptauXatSpUpZ2lxAQIAqVqzosCwkJESLFi2SJPn7+0u6MpoUEBBg1iQnJ5ujS/7+/srIyFBKSorDqFVycrLq1q1r1vz111/Z3v/EiRMO29m6davD+pSUFF26dCnbSNbfubi4yMXFJcf7DAAA8q8cB6u2bdtKujJq061bN4d1BQsWVKlSpTRp0iRLm6tXr57279/vsOzAgQMqWbKkJCk4OFj+/v5as2aNqlevLknKyMjQ+vXrNX78eElSjRo1VLBgQa1Zs0adOnWSJCUmJmrPnj2aMGGCJKlOnTpKTU3Vtm3b9OSTT0qStm7dqtTUVDN81alTR++++64SExPNELd69Wq5uLioRo0alu43AADIn3IcrLKysiRdCTPbt2+Xj4/PXWvqqtdee01169bVmDFj1KlTJ23btk0fffSRPvroI0lXQt7AgQM1ZswYlS1bVmXLltWYMWPk5uamLl26SJLsdrt69uypwYMHy9vbW8WKFdOQIUNUuXJlNWnSRNKVUbAWLVqoV69emjlzpqQr0y2EhYWpfPnykqRmzZqpYsWKCg8P13vvvadTp05pyJAh6tWr10N3dR8AALi+XM9jdS9nV3/iiSe0ePFiDR8+XP/5z38UHBys999/X127djVrhg0bpgsXLqhfv35KSUlRrVq1tHr1anMOK0maPHmyChQooE6dOunChQtq3Lix5syZY85hJUkLFixQRESEefVg69atFRUVZa53dnbW999/r379+qlevXpydXVVly5dNHHixHvwSQAAgPwg1/NYRUREqEyZMoqIiHBYHhUVpYMHD+r999+3sr8HDvNY4WHBPFYAHiR3bR6rRYsWqV69etmW161bV19//XVuNwcAAPDAyHWwOnnypMMVgVd5enrqf//7nyVNAQAA5Ee5DlZlypQx76f3dytWrLB8glAAAID8JNcnrw8aNEgDBgzQiRMn1KhRI0nS2rVrNWnSJM6vAgAAD7VcB6sePXooPT1d7777rkaNGiVJKlWqlKZPn64XX3zR8gYBAADyi1xfFfh3J06ckKurq4oUKWJlTw80rgrEw4KrAgE8SHL6+53rEau/K168+J28HAAA4IFyW8Hq66+/1pdffqmjR48qIyPDYd2OHTssaQwAACC/yfVVgVOmTNFLL70kX19f7dy5U08++aS8vb31xx9/6Nlnn70bPQIAAOQLuQ5W06ZN00cffaSoqCgVKlRIw4YN05o1axQREaHU1NS70SMAAEC+kOtgdfToUdWtW1eS5OrqqjNnzkiSwsPD9fnnn1vbHQAAQD6S62Dl7++vkydPSpJKliypLVu2SLpyc+Y7uMAQAAAg38t1sGrUqJGWLVsmSerZs6dee+01NW3aVJ07d1a7du0sbxAAACC/yPVVgR999JGysrIkSX379lWxYsUUExOjVq1aqW/fvpY3CAAAkF/kaMSqffv2SktLkyTNnz9fmZmZ5rpOnTppypQpioiIUKFChe5OlwAAAPlAjoLVd999p3PnzkmSXnrpJa7+AwAAuI4cHQqsUKGChg8frmeeeUaGYejLL7+84XTu3C8QAAA8rHJ0r8BNmzZp0KBBOnTokE6dOiUPDw/ZbNlv/Gaz2XTq1Km70uiDgnsF4mHBvQIBPEgsvVdg3bp1zWkVnJycdODAAfn6+lrTKQAAwAMi19MtxMfHc/NlAACA68j1dAslS5a8G30AAADke7kesQIAAMD1EawAAAAsQrACAACwSK7PsboqOTlZ+/fvl81mU7ly5bhKEAAAPPRyPWKVlpam8PBwPfLII2rQoIGefvppPfLII/rHP/7BjOwAAOChlutg9fLLL2vr1q367rvvdPr0aaWmpuq7777Tzz//rF69et2NHgEAAPKFXB8K/P7777Vq1SrVr1/fXNa8eXPNmjVLLVq0sLQ5AACA/CTXI1be3t6y2+3Zltvtdnl5eVnSFAAAQH6U62D173//W4MGDVJiYqK5LCkpSUOHDtVbb71laXMAAAD5Sa4PBU6fPl0HDx5UyZIlVaJECUnS0aNH5eLiohMnTmjmzJlm7Y4dO6zrFAAAII/LdbBq27btXWgDAAAg/8t1sBoxYsTd6AMAACDfY+Z1AAAAi+R6xMrJyUk2m+2G6zMzM++oIQAAgPwq18Fq8eLFDs8vXbqknTt3au7cuXrnnXcsawwAACC/yXWwatOmTbZl//d//6dKlSrpiy++UM+ePS1pDAAAIL+x7ByrWrVq6YcffrBqcwAAAPmOJcHqwoULmjp1qh599FErNgcAAJAv5fpQoJeXl8PJ64Zh6MyZM3Jzc9P8+fMtbQ4AACA/yXWwmjx5skOwcnJyUvHixVWrVi3uFQgAAB5quQ5W3bt3vwttAAAA5H85Cla7du3K8QarVKly280AAADkZzkKVtWqVZPNZpNhGJLEBKEAAADXkaOrAuPj4/XHH38oPj5e33zzjYKDgzVt2jTt3LlTO3fu1LRp0/TYY49p0aJFd7tfAACAPCtHI1YlS5Y0/92xY0dNmTJFzz33nLmsSpUqCgoK0ltvvaW2bdta3iQAAEB+kOt5rHbv3q3g4OBsy4ODg/Xbb79Z0hQAAEB+lOtgFRISotGjR+vixYvmsvT0dI0ePVohISGWNgcAAJCf5Hq6hRkzZqhVq1YKCgpS1apVJUm//PKLbDabvvvuO8sbBAAAyC9yHayefPJJxcfHa/78+dq3b58Mw1Dnzp3VpUsXubu7340eAQAA8oVcBytJcnNzU+/eva3uBQAAIF+7rZswz5s3T/Xr11dgYKCOHDki6cqtbr799ltLmwMAAMhPch2spk+frkGDBunZZ59VSkqKOSGol5eX3n//fav7AwAAyDdyHaymTp2qWbNm6V//+pcKFPh/RxJr1qyp3bt3W9ocAABAfpLrYBUfH6/q1atnW+7i4qJz585Z0hQAAEB+lOtgFRwcrLi4uGzLV6xYoYoVK1rREwAAQL6U66sChw4dqv79++vixYsyDEPbtm3T559/rrFjx+rjjz++Gz0CAADkC7kOVi+99JIuX76sYcOG6fz58+rSpYseeeQRffDBB3r++efvRo8AAAD5gs0wDON2X/y///1PWVlZ8vX1tbKnB1paWprsdrtSU1Pl6el5v9u5p4ba7ncHuJfeu+1vFgDIe3L6+31b81hdvnxZP/zwgxYtWiRXV1dJ0vHjx3X27Nnb6xYAAOABkOtDgUeOHFGLFi109OhRpaenq2nTpvLw8NCECRN08eJFzZgx4270CQAAkOflesTq1VdfVc2aNZWSkmKOVklSu3bttHbtWkubAwAAyE9yPWIVExOj//73vypUqJDD8pIlS+rPP/+0rDEAAID8JtcjVllZWeZtbP7u2LFj8vDwsKSpGxk7dqxsNpsGDhxoLjMMQyNHjlRgYKBcXV3VsGFD/frrrw6vS09P1yuvvCIfHx+5u7urdevWOnbsmENNSkqKwsPDZbfbZbfbFR4ertOnTzvUHD16VK1atZK7u7t8fHwUERGhjIyMu7W7AAAgn8l1sGratKnDPQFtNpvOnj2rESNG6LnnnrOyNwfbt2/XRx99pCpVqjgsnzBhgiIjIxUVFaXt27fL399fTZs21ZkzZ8yagQMHavHixYqOjlZMTIzOnj2rsLAwh4DYpUsXxcXFaeXKlVq5cqXi4uIUHh5urs/MzFTLli117tw5xcTEKDo6WosWLdLgwYPv2j4DAID8JdfTLRw/flzPPPOMnJ2d9fvvv6tmzZr6/fff5ePjow0bNtyVqRfOnj2rxx9/XNOmTdPo0aNVrVo1vf/++zIMQ4GBgRo4cKBef/11SVdGp/z8/DR+/Hj16dNHqampKl68uObNm6fOnTub+xAUFKTly5erefPm2rt3rypWrKgtW7aoVq1akqQtW7aoTp062rdvn8qXL68VK1YoLCxMCQkJCgwMlCRFR0ere/fuSk5OzvHUCUy3gIcF0y0AeJDctekWAgMDFRcXpyFDhqhPnz6qXr26xo0bp507d961+az69++vli1bqkmTJg7L4+PjlZSUpGbNmpnLXFxc1KBBA23atEmSFBsbq0uXLjnUBAYGKjQ01KzZvHmz7Ha7GaokqXbt2rLb7Q41oaGhZqiSpObNmys9PV2xsbE37D09PV1paWkODwAA8GDK9cnrkuTq6qoePXqoR48eVveTTXR0tHbs2KHt27dnW5eUlCRJ8vPzc1ju5+enI0eOmDWFChWSl5dXtpqrr09KSrpuKPT19XWoufZ9vLy8VKhQIbPmesaOHat33nnnVrsJAAAeALc1Qej+/fs1YMAANW7cWE2aNNGAAQO0b98+q3tTQkKCXn31Vc2fP1+FCxe+YZ3N5niMyTCMbMuudW3N9epvp+Zaw4cPV2pqqvlISEi4aV8AACD/ynWw+vrrrxUaGqrY2FhVrVpVVapU0Y4dO1S5cmV99dVXljYXGxur5ORk1ahRQwUKFFCBAgW0fv16TZkyRQUKFDBHkK4dMUpOTjbX+fv7KyMjQykpKTet+euvv7K9/4kTJxxqrn2flJQUXbp0KdtI1t+5uLjI09PT4QEAAB5MuQ5Ww4YN0/Dhw7V582ZFRkYqMjJSmzZt0ptvvmmeQG6Vxo0ba/fu3YqLizMfNWvWVNeuXRUXF6fSpUvL399fa9asMV+TkZGh9evXq27dupKkGjVqqGDBgg41iYmJ2rNnj1lTp04dpaamatu2bWbN1q1blZqa6lCzZ88eJSYmmjWrV6+Wi4uLatSoYel+AwCA/CnX51glJSXpxRdfzLb8H//4h9577z1LmrrKw8NDoaGhDsvc3d3l7e1tLh84cKDGjBmjsmXLqmzZshozZozc3NzUpUsXSZLdblfPnj01ePBgeXt7q1ixYhoyZIgqV65sngwfEhKiFi1aqFevXpo5c6YkqXfv3goLC1P58uUlSc2aNVPFihUVHh6u9957T6dOndKQIUPUq1cvRqEAAICk2whWDRs21MaNG1WmTBmH5TExMXrqqacsayynhg0bpgsXLqhfv35KSUlRrVq1tHr1aofJSidPnqwCBQqoU6dOunDhgho3bqw5c+bI2dnZrFmwYIEiIiLMqwdbt26tqKgoc72zs7O+//579evXT/Xq1ZOrq6u6dOmiiRMn3rudBQAAeVqu57GaMWOG3n77bXXq1Em1a9eWdGXOp6+++krvvPOOw3QErVu3trbbBwDzWOFhwTxWAB4kOf39znWwcnLK2WlZNpvture+edgRrPCwIFgBeJDk9Pc714cCs7Ky7qgxAACAB9VtzWMFAACA7HIcrLZu3aoVK1Y4LPvss88UHBwsX19f9e7dW+np6ZY3CAAAkF/kOFiNHDlSu3btMp/v3r1bPXv2VJMmTfTGG29o2bJlGjt27F1pEgAAID/IcbCKi4tT48aNzefR0dGqVauWZs2apUGDBmnKlCn68ssv70qTAAAA+UGOg1VKSorDrVvWr1+vFi1amM+feOIJ7oMHAAAeajkOVn5+foqPj5d05bYxO3bsUJ06dcz1Z86cUcGCBa3vEAAAIJ/IcbBq0aKF3njjDW3cuFHDhw+Xm5ubw0zru3bt0mOPPXZXmgQAAMgPcjyP1ejRo9W+fXs1aNBARYoU0dy5c1WoUCFz/SeffGLeDgYAAOBhlONgVbx4cW3cuFGpqakqUqSIw332JOmrr75SkSJFLG8QAAAgv8j1zOt2u/26y4sVK3bHzQAAAORnzLwOAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFsnTwWrs2LF64okn5OHhIV9fX7Vt21b79+93qDEMQyNHjlRgYKBcXV3VsGFD/frrrw416enpeuWVV+Tj4yN3d3e1bt1ax44dc6hJSUlReHi47Ha77Ha7wsPDdfr0aYeao0ePqlWrVnJ3d5ePj48iIiKUkZFxV/YdAADkP3k6WK1fv179+/fXli1btGbNGl2+fFnNmjXTuXPnzJoJEyYoMjJSUVFR2r59u/z9/dW0aVOdOXPGrBk4cKAWL16s6OhoxcTE6OzZswoLC1NmZqZZ06VLF8XFxWnlypVauXKl4uLiFB4ebq7PzMxUy5Ytde7cOcXExCg6OlqLFi3S4MGD782HAQAA8jybYRjG/W4ip06cOCFfX1+tX79eTz/9tAzDUGBgoAYOHKjXX39d0pXRKT8/P40fP159+vRRamqqihcvrnnz5qlz586SpOPHjysoKEjLly9X8+bNtXfvXlWsWFFbtmxRrVq1JElbtmxRnTp1tG/fPpUvX14rVqxQWFiYEhISFBgYKEmKjo5W9+7dlZycLE9PzxztQ1pamux2u1JTU3P8mgfFUNv97gD30nv55psFAG4tp7/feXrE6lqpqamSpGLFikmS4uPjlZSUpGbNmpk1Li4uatCggTZt2iRJio2N1aVLlxxqAgMDFRoaatZs3rxZdrvdDFWSVLt2bdntdoea0NBQM1RJUvPmzZWenq7Y2Ngb9pyenq60tDSHBwAAeDDlm2BlGIYGDRqk+vXrKzQ0VJKUlJQkSfLz83Oo9fPzM9clJSWpUKFC8vLyummNr69vtvf09fV1qLn2fby8vFSoUCGz5nrGjh1rnrdlt9sVFBSUm90GAAD5SL4JVgMGDNCuXbv0+eefZ1tnszkeYzIMI9uya11bc73626m51vDhw5Wammo+EhISbtoXAADIv/JFsHrllVe0dOlS/fTTT3r00UfN5f7+/pKUbcQoOTnZHF3y9/dXRkaGUlJSblrz119/ZXvfEydOONRc+z4pKSm6dOlStpGsv3NxcZGnp6fDAwAAPJjydLAyDEMDBgzQN998ox9//FHBwcEO64ODg+Xv7681a9aYyzIyMrR+/XrVrVtXklSjRg0VLFjQoSYxMVF79uwxa+rUqaPU1FRt27bNrNm6datSU1Mdavbs2aPExESzZvXq1XJxcVGNGjWs33kAAJDvFLjfDdxM//79tXDhQn377bfy8PAwR4zsdrtcXV1ls9k0cOBAjRkzRmXLllXZsmU1ZswYubm5qUuXLmZtz549NXjwYHl7e6tYsWIaMmSIKleurCZNmkiSQkJC1KJFC/Xq1UszZ86UJPXu3VthYWEqX768JKlZs2aqWLGiwsPD9d577+nUqVMaMmSIevXqxSgUAACQlMeD1fTp0yVJDRs2dFj+6aefqnv37pKkYcOG6cKFC+rXr59SUlJUq1YtrV69Wh4eHmb95MmTVaBAAXXq1EkXLlxQ48aNNWfOHDk7O5s1CxYsUEREhHn1YOvWrRUVFWWud3Z21vfff69+/fqpXr16cnV1VZcuXTRx4sS7tPcAACC/yVfzWD0ImMcKDwvmsQLwIHkg57ECAADIywhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABYhGAFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAgIfS2LFj9cQTT8jDw0O+vr5q27at9u/f71DTvXt32Ww2h0ft2rUdapKSkhQeHi5/f3+5u7vr8ccf19dff+1Qc+DAAbVp00Y+Pj7y9PRUvXr19NNPP931fcS9R7ACADyU1q9fr/79+2vLli1as2aNLl++rGbNmuncuXMOdS1atFBiYqL5WL58ucP68PBw7d+/X0uXLtXu3bvVvn17de7cWTt37jRrWrZsqcuXL+vHH39UbGysqlWrprCwMCUlJd2TfcW9YzMMw7jfTTxM0tLSZLfblZqaKk9Pz/vdzj011Ha/O8C99B7fLMhnTpw4IV9fX61fv15PP/20pCsjVqdPn9aSJUtu+LoiRYpo+vTpCg8PN5d5e3trwoQJ6tmzp/73v/+pePHi2rBhg5566ilJ0pkzZ+Tp6akffvhBjRs3vqv7BWvk9PebESsAACSlpqZKkooVK+awfN26dfL19VW5cuXUq1cvJScnO6yvX7++vvjiC506dUpZWVmKjo5Wenq6GjZsKOlKyAoJCdFnn32mc+fO6fLly5o5c6b8/PxUo0aNe7JvuHcK3O8GAAC43wzD0KBBg1S/fn2Fhoaay5999ll17NhRJUuWVHx8vN566y01atRIsbGxcnFxkSR98cUX6ty5s7y9vVWgQAG5ublp8eLFeuyxxyRJNptNa9asUZs2beTh4SEnJyf5+flp5cqVKlq06P3YXdxFBCsAwENvwIAB2rVrl2JiYhyWd+7c2fx3aGioatasqZIlS+r7779X+/btJUn//ve/lZKSoh9++EE+Pj5asmSJOnbsqI0bN6py5coyDEP9+vWTr6+vNm7cKFdXV3388ccKCwvT9u3bFRAQcE/3FXcXwQoA8FB75ZVXtHTpUm3YsEGPPvroTWsDAgJUsmRJ/f7775KkQ4cOKSoqSnv27FGlSpUkSVWrVtXGjRv14YcfasaMGfrxxx/13XffKSUlxTw3Z9q0aVqzZo3mzp2rN9544+7uIO4pghUA4KFkGIZeeeUVLV68WOvWrVNwcPAtX3Py5EklJCSYo0znz5+XJDk5OZ6y7OzsrKysrJvWODk5mTV4cHDyOgDgodS/f3/Nnz9fCxculIeHh5KSkpSUlKQLFy5Iks6ePashQ4Zo8+bNOnz4sNatW6dWrVrJx8dH7dq1kyRVqFBBZcqUUZ8+fbRt2zYdOnRIkyZN0po1a9S2bVtJUp06deTl5aVu3brpl19+0YEDBzR06FDFx8erZcuW92v3cZcQrAAAD6Xp06crNTVVDRs2VEBAgPn44osvJF0Zddq9e7fatGmjcuXKqVu3bipXrpw2b94sDw8PSVLBggW1fPlyFS9eXK1atVKVKlX02Wefae7cuXruueckST4+Plq5cqXOnj2rRo0aqWbNmoqJidG3336rqlWr3rf9x93BPFb3GPNY4WHBPFYAHiTMYwUAAHCPcfI6AOCOMSL9cGFE+sYYsQIAALAIwQoAAMAiBCsAAACLEKwAAAAsQrACAACwCMEKAADAIgQrAAAAixCsAAAALMIEoffY1TsIpaWl3edO7r30+90A7qmH8H/iDzX+vh8uD+Pf99Xf7VvdCZB7Bd5jx44dU1BQ0P1uAwAA3IaEhAQ9+uijN1xPsLrHsrKydPz4cXl4eMhm4x4QD7q0tDQFBQUpISHhobvpNvCg4+/74WIYhs6cOaPAwEA5Od34TCoOBd5jTk5ON026eDB5enryxQs8oPj7fnjY7fZb1nDyOgAAgEUIVgAAABYhWAF3kYuLi0aMGCEXF5f73QoAi/H3jevh5HUAAACLMGIFAABgEYIVAACARQhWAAAAFiFYAflEqVKl9P7779/vNgD8TcOGDTVw4MD73QbyEIIVHjrdu3eXzWbTuHHjHJYvWbIkT8+Gv337dvXu3ft+twHke8nJyerTp49KlCghFxcX+fv7q3nz5tq8ebMkyWazacmSJTna1jfffKNRo0bdxW6R3zDzOh5KhQsX1vjx49WnTx95eXnd73ZuKiMjQ4UKFVLx4sXvdyvAA6FDhw66dOmS5s6dq9KlS+uvv/7S2rVrderUqRxv49KlSypYsKCKFSt2FztFfsSIFR5KTZo0kb+/v8aOHXvd9SNHjlS1atUclr3//vsqVaqU+bx79+5q27atxowZIz8/PxUtWlTvvPOOLl++rKFDh6pYsWJ69NFH9cknnzhs588//1Tnzp3l5eUlb29vtWnTRocPH8623bFjxyowMFDlypWTlP1Q4OnTp9W7d2/5+fmpcOHCCg0N1XfffXdHnwvwoDt9+rRiYmI0fvx4PfPMMypZsqSefPJJDR8+XC1btjT/xtu1ayebzWY+v/qd8Mknn6h06dJycXGRYRjZDgWWKlVKY8aMUY8ePeTh4aESJUroo48+cuhh06ZNqlatmgoXLqyaNWuao+VxcXH35kPAXUWwwkPJ2dlZY8aM0dSpU3Xs2LHb3s6PP/6o48ePa8OGDYqMjNTIkSMVFhYmLy8vbd26VX379lXfvn2VkJAgSTp//ryeeeYZFSlSRBs2bFBMTIyKFCmiFi1aKCMjw9zu2rVrtXfvXq1Zs+a6YSkrK0vPPvusNm3apPnz5+u3337TuHHj5OzsfNv7AjwMihQpoiJFimjJkiVKT0/Ptn779u2SpE8//VSJiYnmc0k6ePCgvvzySy1atOimIWjSpEmqWbOmdu7cqX79+umf//yn9u3bJ0k6c+aMWrVqpcqVK2vHjh0aNWqUXn/9dWt3EvcVhwLx0GrXrp2qVaumESNGaPbs2be1jWLFimnKlClycnJS+fLlNWHCBJ0/f15vvvmmJGn48OEaN26c/vvf/+r5559XdHS0nJyc9PHHH5vnc3366acqWrSo1q1bp2bNmkmS3N3d9fHHH6tQoULXfd8ffvhB27Zt0969e80RrdKlS9/WPgAPkwIFCmjOnDnq1auXZsyYoccff1wNGjTQ888/rypVqpiH3IsWLSp/f3+H12ZkZGjevHm3PCz/3HPPqV+/fpKk119/XZMnT9a6detUoUIFLViwQDabTbNmzVLhwoVVsWJF/fnnn+rVq9fd2WHcc4xY4aE2fvx4zZ07V7/99tttvb5SpUpycvp/f0Z+fn6qXLmy+dzZ2Vne3t5KTk6WJMXGxurgwYPy8PAw/59zsWLFdPHiRR06dMh8XeXKlW8YqiQpLi5Ojz76qBmqAORchw4ddPz4cS1dulTNmzfXunXr9Pjjj2vOnDk3fV3JkiVzdK5jlSpVzH/bbDb5+/ub3wH79+9XlSpVVLhwYbPmySefvL0dQZ7EiBUeak8//bSaN2+uN998U927dzeXOzk56dq7PV26dCnb6wsWLOjw3GazXXdZVlaWpCuH8GrUqKEFCxZk29bfv7Dd3d1v2rerq+tN1wO4ucKFC6tp06Zq2rSp3n77bb388ssaMWKEw/fAtW71d3nVzb4DDMPIdvUxd5Z7sDBihYfeuHHjtGzZMm3atMlcVrx4cSUlJTl84VlxYunjjz+u33//Xb6+vipTpozDw26353g7VapU0bFjx3TgwIE77gmAVLFiRZ07d07SlWCUmZl5V96nQoUK2rVrl8P5XT///PNdeS/cHwQrPPQqV66srl27aurUqeayhg0b6sSJE5owYYIOHTqkDz/8UCtWrLjj9+ratat8fHzUpk0bbdy4UfHx8Vq/fr1effXVXJ1E36BBAz399NPq0KGD1qxZo/j4eK1YsUIrV6684x6BB9nJkyfVqFEjzZ8/X7t27VJ8fLy++uorTZgwQW3atJF05cq+tWvXKikpSSkpKZa+f5cuXZSVlaXevXtr7969WrVqlSZOnChJeXoePeQcwQqQNGrUKIfRqZCQEE2bNk0ffvihqlatqm3btmnIkCF3/D5ubm7asGGDSpQoofbt2yskJEQ9evTQhQsX5OnpmattLVq0SE888YReeOEFVaxYUcOGDbtr/y8beFAUKVJEtWrV0uTJk/X0008rNDRUb731lnr16qWoqChJV67qW7NmjYKCglS9enVL39/T01PLli1TXFycqlWrpn/96196++23JcnhvCvkXzaDg7sAANw3CxYs0EsvvaTU1FTOn3wAcPI6AAD30GeffabSpUvrkUce0S+//KLXX39dnTp1IlQ9IAhWAADcQ0lJSXr77beVlJSkgIAAdezYUe++++79bgsW4VAgAACARTh5HQAAwCIEKwAAAIsQrAAAACxCsAIAALAIwQoA7pN169bJZrPp9OnTd7Sd7t27q23btpb0BODOEKwAPHAaNmyogQMHZlu+ZMkSh9uGzJkzRzabTTabTc7OzvLy8lKtWrX0n//8R6mpqQ6v7d69u1n798fBgwev28P1QtPx48cVGhqq+vXr6/Tp06pbt64SExNzdZ9IAHkbwQrAQ83T01OJiYk6duyYNm3apN69e+uzzz5TtWrVdPz4cYfaFi1aKDEx0eERHByco/c5dOiQ6tevrxIlSmj16tUqWrSoChUqJH9/f+4RBzxACFYAHmo2m03+/v4KCAhQSEiIevbsqU2bNuns2bMaNmyYQ62Li4v8/f0dHs7Ozrd8j127dql+/fqqVauWvv32W7m5uUnKPqo1Z84cFS1aVKtWrVJISIiKFClihrmrMjMzNWjQIBUtWlTe3t4aNmyYmI4QyDsIVgBwDV9fX3Xt2lVLly694xtbb9q0SQ0aNFD79u21YMECFSxY8Kb158+f18SJEzVv3jxt2LBBR48edbgB+KRJk/TJJ59o9uzZiomJ0alTp7R48eI76hGAdQhWAHAdFSpU0JkzZ3Ty5Elz2XfffaciRYqYj44dO95yO+3atVOrVq304Ycfysnp1l+5ly5d0owZM1SzZk09/vjjGjBggNauXWuuf//99zV8+HB16NBBISEhmjFjBudoAXkI9woEgOu4enjt7+c/PfPMM5o+fbr53N3d/ZbbadOmjRYvXqyNGzfqqaeeumW9m5ubHnvsMfN5QECAkpOTJUmpqalKTExUnTp1zPUFChRQzZo1ORwI5BGMWAF44Hh6ema7qk+STp8+LU9PzxxtY+/evfL09JS3t7e5zN3dXWXKlDEfAQEBt9zOzJkz9cILL+jZZ5/V+vXrb1l/7aFCm81GaALyEYIVgAdOhQoV9PPPP2dbvn37dpUvX/6Wr09OTtbChQvVtm3bHB2+uxmbzaaZM2cqPDxczz33nNatW3fb27Lb7QoICNCWLVvMZZcvX1ZsbOwd9QjAOhwKBPDA6devn6KiotS/f3/17t1brq6uWrNmjWbPnq158+Y51BqGoaSkJBmGodOnT2vz5s0aM2aM7Ha7xo0bZ0k/NptN06ZNk7Ozs1q2bKlly5apUaNGt7WtV199VePGjVPZsmUVEhKiyMjIO55gFIB1CFYAHjilSpXSxo0b9a9//UvNmjXTxYsXVa5cOc2ZMyfbCedpaWkKCAiQzWaTp6enypcvr27duunVV1/N8WHDnLDZbIqKipKzs7PCwsK0dOlSFSiQ+6/gwYMHKzExUd27d5eTk5N69Oihdu3aXffQJ4B7z2Zw8B4AAMASnGMFAABgEYIVAACARQhWAAAAFiFYAQAAWIRgBQAAYBGCFQAAgEUIVgAAABYhWAEAAFiEYAUAAGARghUAAIBFCFYAAAAWIVgBAABY5P8Dsyef2/bBPVwAAAAASUVORK5CYII=",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ax = performance_df.plot.bar(\n",
+    "    color=\"#7400ff\",\n",
+    "    ylim=(1, 100000),\n",
+    "    rot=0,\n",
+    "    xlabel=\"UDF Kind\",\n",
+    "    ylabel=\"Speedup factor\",\n",
+    ")\n",
+    "ax.bar_label(ax.containers[0], fmt=\"%.0f\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## UDF Performance in GroupBy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "size = 100_000_000\n",
+    "pdf = pd.DataFrame()\n",
+    "pdf[\"key\"] = np.random.randint(0, 2, size)\n",
+    "pdf[\"val\"] = np.random.randint(0, 7, size)\n",
+    "\n",
+    "\n",
+    "def custom_formula_udf(df):\n",
+    "    df[\"out\"] = df[\"key\"] * df[\"val\"] - 10\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "gdf = cudf.from_pandas(pdf)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pandas_udf_groupby, cudf_udf_groupby = timeit_pandas_cudf(\n",
+    "    pdf,\n",
+    "    gdf,\n",
+    "    lambda df: df.groupby([\"key\"], group_keys=False).apply(custom_formula_udf),\n",
+    "    number=10,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cudf speedup vs. pandas</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Grouped UDF</th>\n",
+       "      <td>423.83606</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             cudf speedup vs. pandas\n",
+       "Grouped UDF                423.83606"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "performance_df = pd.DataFrame(\n",
+    "    {\"cudf speedup vs. pandas\": [pandas_udf_groupby / cudf_udf_groupby]},\n",
+    "    index=[\"Grouped UDF\"],\n",
+    ")\n",
+    "performance_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGiCAYAAAABVwdNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4X0lEQVR4nO3deVyVdf7//+dRBJFNQQRRUExcRnEZnVBq0kZxmdyqSUsnNa0sV3LNZhzpm2FqYpqp6TjhMmZq0diuuaVjloIULrniljC0ELggiLx/f/jzfDrhwhEQvHzcb7dzu3ne7/d1ndd1vNl59r7e13XZjDFGAAAAFlWhrAsAAAAoTYQdAABgaYQdAABgaYQdAABgaYQdAABgaYQdAABgaYQdAABgaYQdAABgaYQdAABgaYQdAABgaWUadmJiYmSz2RxegYGB9n5jjGJiYhQUFCR3d3e1b99ee/fuddhHbm6uRowYoerVq8vDw0M9evTQqVOnbvWhAACAcqrMZ3aaNGmitLQ0+yslJcXeN336dMXFxWnu3LnauXOnAgMDFRUVpTNnztjHREdHKyEhQStXrtS2bdt09uxZdevWTZcuXSqLwwEAAOWMrSwfBBoTE6P3339fycnJhfqMMQoKClJ0dLQmTJgg6fIsTkBAgKZNm6YhQ4YoKytL/v7+WrZsmfr06SNJOn36tIKDg/Xxxx+rc+fOt/JwAABAOeRS1gUcOnRIQUFBcnNzU0REhGJjY1WvXj2lpqYqPT1dnTp1so91c3NTu3bttH37dg0ZMkSJiYm6ePGiw5igoCA1bdpU27dvv2bYyc3NVW5urv19QUGBfv75Z/n5+clms5XewQIAgBJjjNGZM2cUFBSkChWufbKqTMNORESEli5dqgYNGuh///ufpkyZosjISO3du1fp6emSpICAAIdtAgICdPz4cUlSenq6XF1dVa1atUJjrmx/NVOnTtWLL75YwkcDAADKwsmTJ1W7du1r9pdp2Onatav9z+Hh4Wrbtq3uuusuLVmyRG3atJGkQjMtxpgbzr7caMzEiRM1evRo+/usrCyFhITo5MmT8vb2vplDAQAAt1h2draCg4Pl5eV13XFlfhrr1zw8PBQeHq5Dhw6pV69eki7P3tSsWdM+JiMjwz7bExgYqLy8PGVmZjrM7mRkZCgyMvKan+Pm5iY3N7dC7d7e3oQdAABuMzeaBCnzq7F+LTc3V/v371fNmjUVGhqqwMBArV+/3t6fl5enLVu22INMq1atVKlSJYcxaWlp2rNnz3XDDgAAuHOU6czO2LFj1b17d4WEhCgjI0NTpkxRdna2BgwYIJvNpujoaMXGxiosLExhYWGKjY1VlSpV1LdvX0mSj4+PBg8erDFjxsjPz0++vr4aO3aswsPD1bFjx7I8NAAAUE6Uadg5deqUHnvsMf3444/y9/dXmzZttGPHDtWpU0eSNH78eOXk5Gjo0KHKzMxURESE1q1b53BubtasWXJxcVHv3r2Vk5OjDh06KD4+XhUrViyrwwIAAOVImd5np7zIzs6Wj4+PsrKyWLMD4JqMMcrPz+empcAtUrFiRbm4uFxzTU5Rf7/L1QJlACiv8vLylJaWpvPnz5d1KcAdpUqVKqpZs6ZcXV1veh+EHQC4gYKCAqWmpqpixYoKCgqSq6srNyAFSpkxRnl5efrhhx+UmpqqsLCw69448HoIOwBwA3l5eSooKFBwcLCqVKlS1uUAdwx3d3dVqlRJx48fV15enipXrnxT+ylXl54DQHl2s/9XCeDmlcS/O/7lAgAASyPsAAAAS2PNDgAUw7hbvE55Rjm4WUh8fLyio6P1yy+/2NsWLlyol156Sd9//73i4uIUHR1dZvU549ixYwoNDdXu3bvVokWLsi7ntlW3bl1FR0eX2793wg4AoFiys7M1fPhwxcXF6eGHH5aPj09ZlwQ4IOwAAIrlxIkTunjxoh544AGHBzcD5QVrdgDAwgoKCjRt2jTVr19fbm5uCgkJ0csvvyxJ2rx5s2w2m8PpqOTkZNlsNh07dszeFh8fr5CQEFWpUkUPPvigfvrpJ4e+8PBwSVK9evUKbXtFXl6ehg8frpo1a6py5cqqW7eupk6dau+32WyaP3++unbtKnd3d4WGhmr16tUO+/j+++/Vp08fVatWTX5+furZs2ehz3rrrbfUuHFjVa5cWY0aNdK8efMc+r/++mu1bNlSlStXVuvWrbV7926H/vj4eFWtWtWh7f3333e4r1JMTIxatGihN9980347gkceecThe/y1goIC1a5dWwsWLHBoT0pKks1m09GjR+37DQkJkZubm4KCgjRy5Mir7u9qjh07JpvNppUrVyoyMlKVK1dWkyZNtHnzZvuYS5cuafDgwQoNDZW7u7saNmyo2bNnO+xn4MCB6tWrl1599VXVrFlTfn5+GjZsmC5evGgfk5GRoe7du9v/nv79738XqicuLk7h4eHy8PBQcHCwhg4dqrNnz9r7jx8/ru7du6tatWry8PBQkyZN9PHHHxf5eJ1F2AEAC5s4caKmTZumSZMmad++fVqxYoUCAgKKvP1XX32lQYMGaejQoUpOTtb999+vKVOm2Pv79Omjzz//XNLlIJGWlqbg4OBC+5kzZ47Wrl2rVatW6cCBA1q+fLnq1q3rMGbSpEl6+OGH9c033+ivf/2rHnvsMe3fv1+SdP78ed1///3y9PTUF198oW3btsnT01NdunRRXl6eJGnRokX629/+ppdffln79+9XbGysJk2apCVLlkiSzp07p27duqlhw4ZKTExUTEyMxo4d69T3ecXhw4e1atUqffDBB/r000+VnJysYcOGXXVshQoV9OijjxYKBStWrFDbtm1Vr149rVmzRrNmzdKbb76pQ4cO6f3337eHSGeMGzdOY8aM0e7duxUZGakePXrYw+mV0LVq1Srt27dP//jHP/TCCy9o1apVDvvYtGmTjhw5ok2bNmnJkiWKj49XfHy8vX/gwIE6duyYNm7cqDVr1mjevHnKyMgodMxz5szRnj17tGTJEm3cuFHjx4+39w8bNky5ubn64osvlJKSomnTpsnT09Pp4y0yA5OVlWUkmaysrLIuBUA5lJOTY/bt22dycnIK9Y3VrX05Izs727i5uZlFixZdtX/Tpk1GksnMzLS37d6920gyqampxhhjHnvsMdOlSxeH7fr06WN8fHyuuc3VjBgxwvzpT38yBQUFV+2XZJ555hmHtoiICPPss88aY4xZvHixadiwocP2ubm5xt3d3Xz22WfGGGOCg4PNihUrHPbx0ksvmbZt2xpjjHnzzTeNr6+vOXfunL1//vz5RpLZvXu3McaYt956y+HYjDEmISHB/PrncvLkyaZixYrm5MmT9rZPPvnEVKhQwaSlpV31+JKSkozNZjPHjh0zxhhz6dIlU6tWLfPGG28YY4yZOXOmadCggcnLy7vq9jeSmppqJJlXXnnF3nbx4kVTu3ZtM23atGtuN3ToUPPwww/b3w8YMMDUqVPH5Ofn29seeeQR06dPH2OMMQcOHDCSzI4dO+z9+/fvN5LMrFmzrvk5q1atMn5+fvb34eHhJiYmpkjHdr1/f0X9/WZmBwAsav/+/crNzVWHDh2KtY+2bds6tP32fVEMHDhQycnJatiwoUaOHKl169YVGnO1z7kys5OYmKjDhw/Ly8tLnp6e8vT0lK+vry5cuKAjR47ohx9+0MmTJzV48GB7v6enp6ZMmaIjR47Yj6V58+YOd8G+mWORpJCQENWuXdthPwUFBTpw4MBVx7ds2VKNGjXS22+/LUnasmWLMjIy1Lt3b0nSI488opycHNWrV09PPfWUEhISlJ+f73Rdvz4eFxcXtW7d2v4dStKCBQvUunVr+fv7y9PTU4sWLdKJEycc9tGkSRNVrFjR/r5mzZr2mZv9+/fb93tFo0aNCp3627Rpk6KiolSrVi15eXmpf//++umnn3Tu3DlJ0siRIzVlyhTdc889mjx5sr799lunj9UZhB0AsCh3d/fr9l+5M60x/3c9+6/XZvy2rzh+//vfKzU1VS+99JJycnLUu3dv/eUvf7nhdlfWyhQUFKhVq1ZKTk52eB08eFB9+/ZVQUGBpMunsn7dv2fPHu3YsaPIx1KhQoVC4377nVyvzus9M61fv35asWKFpMunsDp37qzq1atLkoKDg3XgwAG98cYbcnd319ChQ3XfffcV6bOLWtuqVav03HPPadCgQVq3bp2Sk5P1xBNP2E8DXlGpUqVC21/5fq98N9c7zuPHj+vPf/6zmjZtqnfffVeJiYl64403JP3fd/nkk0/q6NGjevzxx5WSkqLWrVvr9ddfL/axXgthBwAsKiwsTO7u7tqwYcNV+/39/SVJaWlp9rbk5GSHMb/73e/sYeGK374vKm9vb/Xp00eLFi3SO++8o3fffVc///zzNfe7Y8cONWrUSNLlsHTo0CHVqFFD9evXd3j5+PgoICBAtWrV0tGjRwv1h4aG2o/lm2++UU5OzjU/09/fX2fOnLHPQFztO5EuX4F2+vRp+/svv/xSFSpUUIMGDa55/H379lVKSooSExO1Zs0a9evXz6Hf3d1dPXr00Jw5c7R582Z9+eWXSklJueb+rubXx5Ofn6/ExET7d7h161ZFRkZq6NChatmyperXr2+f9Sqqxo0bKz8/X7t27bK3HThwwGFx9q5du5Sfn6+ZM2eqTZs2atCggcN3dUVwcLCeeeYZvffeexozZowWLVrkVC3OIOwAgEVVrlxZEyZM0Pjx47V06VIdOXJEO3bs0OLFiyVJ9evXV3BwsGJiYnTw4EF99NFHmjlzpsM+Ro4cqU8//VTTp0/XwYMHNXfuXH366adO1zJr1iytXLlS3333nQ4ePKjVq1crMDDQ4fTH6tWr9a9//UsHDx7U5MmT9fXXX2v48OGSLs+KVK9eXT179tTWrVuVmpqqLVu2aNSoUTp16pSky1czTZ06VbNnz9bBgweVkpKit956S3FxcZIuh40KFSpo8ODB2rdvnz7++GO9+uqrDnVGRESoSpUqeuGFF3T48GGtWLHCYXHur7/bAQMG6JtvvtHWrVs1cuRI9e7dW4GBgdf8DkJDQxUZGanBgwcrPz9fPXv2tPfFx8dr8eLF2rNnj44ePaply5bJ3d1dderUkXR5oXn//v1v+D2/8cYbSkhI0Hfffadhw4YpMzNTgwYNknT573vXrl367LPPdPDgQU2aNEk7d+684T5/rWHDhurSpYueeuopffXVV0pMTNSTTz7pMIt41113KT8/X6+//rr9WH57JVp0dLQ+++wzpaamKikpSRs3blTjxo2dqsUpRVodZHEsUAZwPddbIFneXbp0yUyZMsXUqVPHVKpUyYSEhJjY2Fh7/7Zt20x4eLipXLmy+eMf/2hWr15daLHx4sWLTe3atY27u7vp3r27efXVV51eoLxw4ULTokUL4+HhYby9vU2HDh1MUlKSvV+SeeONN0xUVJRxc3MzderUMW+//bbDPtLS0kz//v1N9erVjZubm6lXr5556qmnHP7b/e9//9u0aNHCuLq6mmrVqpn77rvPvPfee/b+L7/80jRv3ty4urqaFi1amHfffddhgbIxlxck169f31SuXNl069bNLFy4sNAC5ebNm5t58+aZoKAgU7lyZfPQQw+Zn3/++YZ/H2+88YaRZPr37+/QnpCQYCIiIoy3t7fx8PAwbdq0MZ9//rm9f8CAAaZdu3bX3O+VBcorVqwwERERxtXV1TRu3Nhs2LDBPubChQtm4MCBxsfHx1StWtU8++yz5vnnnzfNmzd3+JyePXs67HvUqFEOn52WlmYeeOAB4+bmZkJCQszSpUtNnTp1HBYox8XFmZo1axp3d3fTuXNns3TpUofF8MOHDzd33XWXcXNzM/7+/ubxxx83P/7441WPrSQWKNuMKaETsrex7Oxs+fj4KCsrS97e3mVdDoBy5sKFC0pNTVVoaKgqV65c1uVYks1mU0JCgnr16lXWpdxQTEyM3n///aue3iorVn7sxfX+/RX195vTWAAAwNIIOwAAwNI4jSVOYwG4Pk5jAWWH01gAAAA3QNgBgCJiIhy49Uri3x1hBwBu4ModZc+fP1/GlQB3niv/7n57Z2dnuJRUMQBgVRUrVlTVqlXtzweqUqXKdW+XD6D4jDE6f/68MjIyVLVqVYfndTmLsAMARXDlzrhXAg+AW6Nq1arXvTN1URB2AKAIbDabatasqRo1apTIwxkB3FilSpWKNaNzBWEHAJxQsWLFEvmPL4BbhwXKAADA0gg7AADA0gg7AADA0gg7AADA0gg7AADA0gg7AADA0gg7AADA0gg7AADA0gg7AADA0gg7AG5rU6dOlc1mU3R0tCTp4sWLmjBhgsLDw+Xh4aGgoCD1799fp0+fvur2xhh17dpVNptN77///q0rHMAtQ9gBcNvauXOnFi5cqGbNmtnbzp8/r6SkJE2aNElJSUl67733dPDgQfXo0eOq+3jttdd4gjlgcTwbC8Bt6ezZs+rXr58WLVqkKVOm2Nt9fHy0fv16h7Gvv/667r77bp04cUIhISH29m+++UZxcXHauXOnatasectqB3BrMbMD4LY0bNgwPfDAA+rYseMNx2ZlZclms6lq1ar2tvPnz+uxxx7T3LlzFRgYWIqVAihrzOwAuO2sXLlSSUlJ2rlz5w3HXrhwQc8//7z69u0rb29ve/tzzz2nyMhI9ezZszRLBVAOEHYA3FZOnjypUaNGad26dapcufJ1x168eFGPPvqoCgoKNG/ePHv72rVrtXHjRu3evbu0ywVQDnAaC8BtJTExURkZGWrVqpVcXFzk4uKiLVu2aM6cOXJxcdGlS5ckXQ46vXv3VmpqqtavX+8wq7Nx40YdOXJEVatWte9Dkh5++GG1b9++LA4LQCmyGWNMWRdR1rKzs+Xj46OsrCyH/yACKH/OnDmj48ePO7Q98cQTatSokSZMmKCmTZvag86hQ4e0adMm+fv7O4xPT0/Xjz/+6NAWHh6u2bNnq3v37goNDS314wBQfEX9/eY0FoDbipeXl5o2berQ5uHhIT8/PzVt2lT5+fn6y1/+oqSkJH344Ye6dOmS0tPTJUm+vr5ydXVVYGDgVRclh4SEEHQACyLsALCUU6dOae3atZKkFi1aOPRt2rSJ01TAHYiwA+C2t3nzZvuf69atq5s5O88ZfcC6WKAMAAAsjZkd3JHG8XQAwLJmMEmH32BmBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWFq5CTtTp06VzWZTdHS0vc0Yo5iYGAUFBcnd3V3t27fX3r17HbbLzc3ViBEjVL16dXl4eKhHjx46derULa4eAACUV+Ui7OzcuVMLFy5Us2bNHNqnT5+uuLg4zZ07Vzt37lRgYKCioqJ05swZ+5jo6GglJCRo5cqV2rZtm86ePatu3brp0qVLt/owAABAOVTmYefs2bPq16+fFi1apGrVqtnbjTF67bXX9Le//U0PPfSQmjZtqiVLluj8+fNasWKFJCkrK0uLFy/WzJkz1bFjR7Vs2VLLly9XSkqKPv/887I6JAAAUI6UedgZNmyYHnjgAXXs2NGhPTU1Venp6erUqZO9zc3NTe3atdP27dslSYmJibp48aLDmKCgIDVt2tQ+5mpyc3OVnZ3t8AIAANbkUpYfvnLlSiUlJWnnzp2F+tLT0yVJAQEBDu0BAQE6fvy4fYyrq6vDjNCVMVe2v5qpU6fqxRdfLG75AADgNlBmMzsnT57UqFGjtHz5clWuXPma42w2m8N7Y0yhtt+60ZiJEycqKyvL/jp58qRzxQMAgNtGmYWdxMREZWRkqFWrVnJxcZGLi4u2bNmiOXPmyMXFxT6j89sZmoyMDHtfYGCg8vLylJmZec0xV+Pm5iZvb2+HFwAAsKYyCzsdOnRQSkqKkpOT7a/WrVurX79+Sk5OVr169RQYGKj169fbt8nLy9OWLVsUGRkpSWrVqpUqVarkMCYtLU179uyxjwEAAHe2Mluz4+XlpaZNmzq0eXh4yM/Pz94eHR2t2NhYhYWFKSwsTLGxsapSpYr69u0rSfLx8dHgwYM1ZswY+fn5ydfXV2PHjlV4eHihBc8AAODOVKYLlG9k/PjxysnJ0dChQ5WZmamIiAitW7dOXl5e9jGzZs2Si4uLevfurZycHHXo0EHx8fGqWLFiGVYOAADKC5sxxpR1EWUtOztbPj4+ysrKYv3OHWLc9de4A7iNzbjjf9XuHEX9/S7z++wAAACUJsIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNMIOAACwNKfCTn5+vl588UWdPHmytOoBAAAoUU6FHRcXF82YMUOXLl0qrXoAAABKlNOnsTp27KjNmzeXQikAAAAlz8XZDbp27aqJEydqz549atWqlTw8PBz6e/ToUWLFAQAAFJfNGGOc2aBChWtPBtlsttvyFFd2drZ8fHyUlZUlb2/vsi4Ht8A4W1lXAKC0zHDqVw23s6L+fjs9s1NQUFCswgAAAG4lLj0HAACWdlNhZ8uWLerevbvq16+vsLAw9ejRQ1u3bi3p2gAAAIrN6bCzfPlydezYUVWqVNHIkSM1fPhwubu7q0OHDlqxYkVp1AgAAHDTnF6g3LhxYz399NN67rnnHNrj4uK0aNEi7d+/v0QLvBVYoHznYYEyYF0sUL5zFPX32+mZnaNHj6p79+6F2nv06KHU1FRndwcAAFCqnA47wcHB2rBhQ6H2DRs2KDg42Kl9zZ8/X82aNZO3t7e8vb3Vtm1bffLJJ/Z+Y4xiYmIUFBQkd3d3tW/fXnv37nXYR25urkaMGKHq1avLw8NDPXr00KlTp5w9LAAAYFFOX3o+ZswYjRw5UsnJyYqMjJTNZtO2bdsUHx+v2bNnO7Wv2rVr65VXXlH9+vUlSUuWLFHPnj21e/duNWnSRNOnT1dcXJzi4+PVoEEDTZkyRVFRUTpw4IC8vLwkSdHR0frggw+0cuVK+fn5acyYMerWrZsSExNVsWJFZw8PAABYjNNrdiQpISFBM2fOtK/Pady4scaNG6eePXsWuyBfX1/NmDFDgwYNUlBQkKKjozVhwgRJl2dxAgICNG3aNA0ZMkRZWVny9/fXsmXL1KdPH0nS6dOnFRwcrI8//lidO3cu0meyZufOw5odwLpYs3PnKLWbCkrSgw8+qAcffPCmi7uaS5cuafXq1Tp37pzatm2r1NRUpaenq1OnTvYxbm5uateunbZv364hQ4YoMTFRFy9edBgTFBSkpk2bavv27dcMO7m5ucrNzbW/z87OLtFjAQAA5YfTa3bq1aunn376qVD7L7/8onr16jldQEpKijw9PeXm5qZnnnlGCQkJ+t3vfqf09HRJUkBAgMP4gIAAe196erpcXV1VrVq1a465mqlTp8rHx8f+cnatEQAAuH04HXaOHTt21edf5ebm6vvvv3e6gIYNGyo5OVk7duzQs88+qwEDBmjfvn32fpvN8XyDMaZQ22/daMzEiROVlZVlf508edLpugEAwO2hyKex1q5da//zZ599Jh8fH/v7S5cuacOGDapbt67TBbi6utoXKLdu3Vo7d+7U7Nmz7et00tPTVbNmTfv4jIwM+2xPYGCg8vLylJmZ6TC7k5GRocjIyGt+ppubm9zc3JyuFQAA3H6KHHZ69eol6fJMy4ABAxz6KlWqpLp162rmzJnFLsgYo9zcXIWGhiowMFDr169Xy5YtJUl5eXnasmWLpk2bJklq1aqVKlWqpPXr16t3796SpLS0NO3Zs0fTp08vdi0AAOD2V+Swc+Vp56Ghodq5c6eqV69e7A9/4YUX1LVrVwUHB+vMmTNauXKlNm/erE8//VQ2m03R0dGKjY1VWFiYwsLCFBsbqypVqqhv376SJB8fHw0ePFhjxoyRn5+ffH19NXbsWIWHh6tjx47Frg8AANz+nL4aqyTvkvy///1Pjz/+uNLS0uTj46NmzZrp008/VVRUlCRp/PjxysnJ0dChQ5WZmamIiAitW7fOfo8dSZo1a5ZcXFzUu3dv5eTkqEOHDoqPj+ceOwAAQNJN3Gdn5MiRql+/vkaOHOnQPnfuXB0+fFivvfZaSdZ3S3CfnTsP99kBrIv77Nw5Su3ZWO+++67uueeeQu2RkZFas2aNs7sDAAAoVU6HnZ9++snhSqwrvL299eOPP5ZIUQAAACXF6bBTv359ffrpp4XaP/nkk5u6qSAAAEBpcnqB8ujRozV8+HD98MMP+tOf/iTp8hPPZ86ceVuu1wEAANbmdNgZNGiQcnNz9fLLL+ull16SJNWtW1fz589X//79S7xAAACA4ripp55f8cMPP8jd3V2enp4lWdMtx9VYdx6uxgKsi6ux7hyl+tTzK/z9/YuzOQAAQKm7qbCzZs0arVq1SidOnFBeXp5DX1JSUokUBgAAUBKcvhprzpw5euKJJ1SjRg3t3r1bd999t/z8/HT06FF17dq1NGoEAAC4aU6HnXnz5mnhwoWaO3euXF1dNX78eK1fv14jR45UVlZWadQIAABw05wOOydOnFBkZKQkyd3dXWfOnJEkPf7443r77bdLtjoAAIBicjrsBAYG6qeffpIk1alTRzt27JB0+QGhxbiwCwAAoFQ4HXb+9Kc/6YMPPpAkDR48WM8995yioqLUp08fPfjggyVeIAAAQHE4fTXWwoULVVBQIEl65pln5Ovrq23btql79+565plnSrxAAACA4ijSzM5DDz2k7OxsSdLy5ct16dIle1/v3r01Z84cjRw5Uq6urqVTJQAAwE0qUtj58MMPde7cOUnSE088wVVXAADgtlGk01iNGjXSxIkTdf/998sYo1WrVl3ztsw8HwsAAJQnRXo21vbt2zV69GgdOXJEP//8s7y8vGSzFX64kM1m088//1wqhZYmno115+HZWIB18WysO0eJPhsrMjLSfol5hQoVdPDgQdWoUaNkKgUAAChFTl96npqaygNAAQDAbcPpS8/r1KlTGnUAAACUCqdndgAAAG4nhB0AAGBphB0AAGBpTq/ZuSIjI0MHDhyQzWZTgwYNuDoLAACUS07P7GRnZ+vxxx9XrVq11K5dO913332qVauW/vrXv3JnZQAAUO44HXaefPJJffXVV/rwww/1yy+/KCsrSx9++KF27dqlp556qjRqBAAAuGlOn8b66KOP9Nlnn+nee++1t3Xu3FmLFi1Sly5dSrQ4AACA4nJ6ZsfPz08+Pj6F2n18fFStWrUSKQoAAKCkOB12/v73v2v06NFKS0uzt6Wnp2vcuHGaNGlSiRYHAABQXE6fxpo/f74OHz6sOnXqKCQkRJJ04sQJubm56YcfftCbb75pH5uUlFRylQIAANwEp8NOr169SqEMAACA0uF02Jk8eXJp1AEAAFAquIMyAACwNKdndipUqCCbzXbN/kuXLhWrIAAAgJLkdNhJSEhweH/x4kXt3r1bS5Ys0YsvvlhihQEAAJQEp8NOz549C7X95S9/UZMmTfTOO+9o8ODBJVIYAABASSixNTsRERH6/PPPS2p3AAAAJaJEwk5OTo5ef/111a5duyR2BwAAUGKcPo1VrVo1hwXKxhidOXNGVapU0fLly0u0OAAAgOJyOuzMmjXLIexUqFBB/v7+ioiI4NlYAACg3HE67AwcOLAUygAAACgdRQo73377bZF32KxZs5suBgAAoKQVKey0aNFCNptNxhhJ4qaCAADgtlGkq7FSU1N19OhRpaam6r333lNoaKjmzZun3bt3a/fu3Zo3b57uuusuvfvuu6VdLwAAgFOKNLNTp04d+58feeQRzZkzR3/+85/tbc2aNVNwcLAmTZrEU9EBAEC54vR9dlJSUhQaGlqoPTQ0VPv27SuRogAAAEqK02GncePGmjJlii5cuGBvy83N1ZQpU9S4ceMSLQ4AAKC4nL70fMGCBerevbuCg4PVvHlzSdI333wjm82mDz/8sMQLBAAAKA6nw87dd9+t1NRULV++XN99952MMerTp4/69u0rDw+P0qgRAADgpjkddiSpSpUqevrpp0u6FgAAgBJ3Uw8CXbZsme69914FBQXp+PHjki4/RuI///lPiRYHAABQXE6Hnfnz52v06NHq2rWrMjMz7TcRrFatml577bWSrg8AAKBYnA47r7/+uhYtWqS//e1vcnH5v7NgrVu3VkpKSokWBwAAUFxOh53U1FS1bNmyULubm5vOnTtXIkUBAACUFKfDTmhoqJKTkwu1f/LJJ/rd735XEjUBAACUGKevxho3bpyGDRumCxcuyBijr7/+Wm+//bamTp2qf/7zn6VRIwAAwE1zOuw88cQTys/P1/jx43X+/Hn17dtXtWrV0uzZs/Xoo4+WRo0AAAA3zWaMMTe78Y8//qiCggLVqFGjJGu65bKzs+Xj46OsrCx5e3uXdTm4BcbZyroCAKVlxk3/quF2U9Tf75u6z05+fr4+//xzvfvuu3J3d5cknT59WmfPnr25agEAAEqJ06exjh8/ri5duujEiRPKzc1VVFSUvLy8NH36dF24cEELFiwojToBAABuitMzO6NGjVLr1q2VmZlpn9WRpAcffFAbNmwo0eIAAACKy+mZnW3btum///2vXF1dHdrr1Kmj77//vsQKAwAAKAlOz+wUFBTYHxHxa6dOnZKXl5dT+5o6dar+8Ic/yMvLSzVq1FCvXr104MABhzHGGMXExCgoKEju7u5q37699u7d6zAmNzdXI0aMUPXq1eXh4aEePXro1KlTzh4aAACwIKfDTlRUlMMzsGw2m86ePavJkyfrz3/+s1P72rJli4YNG6YdO3Zo/fr1ys/PV6dOnRzuxDx9+nTFxcVp7ty52rlzpwIDAxUVFaUzZ87Yx0RHRyshIUErV67Utm3bdPbsWXXr1u2qoQwAANxZnL70/PTp07r//vtVsWJFHTp0SK1bt9ahQ4dUvXp1ffHFF8W6DP2HH35QjRo1tGXLFt13330yxigoKEjR0dGaMGGCpMuzOAEBAZo2bZqGDBmirKws+fv7a9myZerTp4+9xuDgYH388cfq3LnzDT+XS8/vPFx6DlgXl57fOUrt0vOgoCAlJydr7NixGjJkiFq2bKlXXnlFu3fvLvb9drKysiRJvr6+ki4/hys9PV2dOnWyj3Fzc1O7du20fft2SVJiYqIuXrzoMCYoKEhNmza1j/mt3NxcZWdnO7wAAIA1Ob1AWZLc3d01aNAgDRo0qMQKMcZo9OjRuvfee9W0aVNJUnp6uiQpICDAYWxAQICOHz9uH+Pq6qpq1aoVGnNl+9+aOnWqXnzxxRKrHQAAlF83dVPBAwcOaPjw4erQoYM6duyo4cOH67vvvitWIcOHD9e3336rt99+u1CfzeZ4zsEYU6jtt643ZuLEicrKyrK/Tp48efOFAwCAcs3psLNmzRo1bdpUiYmJat68uZo1a6akpCSFh4dr9erVN1XEiBEjtHbtWm3atEm1a9e2twcGBkpSoRmajIwM+2xPYGCg8vLylJmZec0xv+Xm5iZvb2+HFwAAsCanw8748eM1ceJEffnll4qLi1NcXJy2b9+uF154wb6IuKiMMRo+fLjee+89bdy4UaGhoQ79oaGhCgwM1Pr16+1teXl52rJliyIjIyVJrVq1UqVKlRzGpKWlac+ePfYxAADgzuX0mp309HT179+/UPtf//pXzZgxw6l9DRs2TCtWrNB//vMfeXl52WdwfHx85O7uLpvNpujoaMXGxiosLExhYWGKjY1VlSpV1LdvX/vYwYMHa8yYMfLz85Ovr6/Gjh2r8PBwdezY0dnDAwAAFuN02Gnfvr22bt2q+vXrO7Rv27ZNf/zjH53a1/z58+37/LW33npLAwcOlHR5JiknJ0dDhw5VZmamIiIitG7dOocbGM6aNUsuLi7q3bu3cnJy1KFDB8XHx6tixYrOHh4AALAYp++zs2DBAv3jH/9Q79691aZNG0nSjh07tHr1ar344osKCgqyj+3Ro0fJVltKuM/OnYf77ADWxX127hxF/f12OuxUqFC0ZT42m+22uYMxYefOQ9gBrIuwc+co6u+306exCgoKilUYAADArXRT99kBAAC4XRQ57Hz11Vf65JNPHNqWLl2q0NBQ1ahRQ08//bRyc3NLvEAAAIDiKHLYiYmJ0bfffmt/n5KSosGDB6tjx456/vnn9cEHH2jq1KmlUiQAAMDNKnLYSU5OVocOHezvV65cqYiICC1atEijR4/WnDlztGrVqlIpEgAA4GYVOexkZmY6PH5hy5Yt6tKli/39H/7wB54xBQAAyp0ih52AgAClpqZKuvzIhqSkJLVt29bef+bMGVWqVKnkKwQAACiGIoedLl266Pnnn9fWrVs1ceJEValSxeGOyd9++63uuuuuUikSAADgZhX5PjtTpkzRQw89pHbt2snT01NLliyRq6urvf9f//qXOnXqVCpFAgAA3Kwihx1/f39t3bpVWVlZ8vT0LPTcqdWrV8vT07PECwQAACgOp++g7OPjc9V2X1/fYhcDAABQ0riDMgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsDTCDgAAsLQyDTtffPGFunfvrqCgINlsNr3//vsO/cYYxcTEKCgoSO7u7mrfvr327t3rMCY3N1cjRoxQ9erV5eHhoR49eujUqVO38CgAAEB5VqZh59y5c2revLnmzp171f7p06crLi5Oc+fO1c6dOxUYGKioqCidOXPGPiY6OloJCQlauXKltm3bprNnz6pbt266dOnSrToMAABQjtmMMaasi5Akm82mhIQE9erVS9LlWZ2goCBFR0drwoQJki7P4gQEBGjatGkaMmSIsrKy5O/vr2XLlqlPnz6SpNOnTys4OFgff/yxOnfuXKTPzs7Olo+Pj7KysuTt7V0qx4fyZZytrCsAUFpmlItfNdwKRf39LrdrdlJTU5Wenq5OnTrZ29zc3NSuXTtt375dkpSYmKiLFy86jAkKClLTpk3tY64mNzdX2dnZDi8AAGBN5TbspKenS5ICAgIc2gMCAux96enpcnV1VbVq1a455mqmTp0qHx8f+ys4OLiEqwcAAOVFuQ07V9hsjucbjDGF2n7rRmMmTpyorKws++vkyZMlUisAACh/ym3YCQwMlKRCMzQZGRn22Z7AwEDl5eUpMzPzmmOuxs3NTd7e3g4vAABgTeU27ISGhiowMFDr16+3t+Xl5WnLli2KjIyUJLVq1UqVKlVyGJOWlqY9e/bYxwAAgDubS1l++NmzZ3X48GH7+9TUVCUnJ8vX11chISGKjo5WbGyswsLCFBYWptjYWFWpUkV9+/aVJPn4+Gjw4MEaM2aM/Pz85Ovrq7Fjxyo8PFwdO3Ysq8MCAADlSJmGnV27dun++++3vx89erQkacCAAYqPj9f48eOVk5OjoUOHKjMzUxEREVq3bp28vLzs28yaNUsuLi7q3bu3cnJy1KFDB8XHx6tixYq3/HgAAED5U27us1OWuM/OnYf77ADWxX127hy3/X12AAAASgJhBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWBphBwAAWJpLWRdQHhhjJEnZ2dllXAluldyyLgBAqeE/5XeOK7/bV37Hr4WwI+nMmTOSpODg4DKuBABQXK/7lHUFuNXOnDkjH59r/8XbzI3i0B2goKBAp0+flpeXl2w2W1mXA6AEZWdnKzg4WCdPnpS3t3dZlwOgBBljdObMGQUFBalChWuvzCHsALC07Oxs+fj4KCsri7AD3KFYoAwAACyNsAMAACyNsAPA0tzc3DR58mS5ubmVdSkAyghrdgAAgKUxswMAACyNsAMAACyNsAMAACyNsAMAACyNsAMA/7+6devqtddeK+syAJQwwg4Ap6Snp2vUqFGqX7++KleurICAAN17771asGCBzp8/X9bllaqBAweqV69ehdqTk5Nls9l07NgxSdLmzZtls9lks9lUoUIF+fj4qGXLlho/frzS0tIcto2JibGP/fXr888/vwVHBNwZeBAogCI7evSo7rnnHlWtWlWxsbEKDw9Xfn6+Dh48qH/9618KCgpSjx49rrrtxYsXValSpVtccdk6cOCAvL29lZ2draSkJE2fPl2LFy/W5s2bFR4ebh/XpEmTQuHG19f3VpcLWBYzOwCKbOjQoXJxcdGuXbvUu3dvNW7cWOHh4Xr44Yf10UcfqXv37vaxNptNCxYsUM+ePeXh4aEpU6ZIkubPn6+77rpLrq6uatiwoZYtW2bf5tixY7LZbEpOTra3/fLLL7LZbNq8ebOk/5s1+eijj9S8eXNVrlxZERERSklJcah1+/btuu++++Tu7q7g4GCNHDlS586ds/dnZGSoe/fucnd3V2hoqP7973+X+PdVo0YNBQYGqkGDBnr00Uf13//+V/7+/nr22Wcdxrm4uCgwMNDh5erqWuL1AHcqwg6AIvnpp5+0bt06DRs2TB4eHlcdY7PZHN5PnjxZPXv2VEpKigYNGqSEhASNGjVKY8aM0Z49ezRkyBA98cQT2rRpk9P1jBs3Tq+++qp27typGjVqqEePHrp48aIkKSUlRZ07d9ZDDz2kb7/9Vu+88462bdum4cOH27cfOHCgjh07po0bN2rNmjWaN2+eMjIynK7DGe7u7nrmmWf03//+t9Q/C8D/IewAKJLDhw/LGKOGDRs6tFevXl2enp7y9PTUhAkTHPr69u2rQYMGqV69eqpTp45effVVDRw4UEOHDlWDBg00evRoPfTQQ3r11Vedrmfy5MmKiopSeHi4lixZov/9739KSEiQJM2YMUN9+/ZVdHS0wsLCFBkZqTlz5mjp0qW6cOGCDh48qE8++UT//Oc/1bZtW7Vq1UqLFy9WTk7OzX9BRdSoUSNJsq/vkS6Hsyvfoaenp+6+++5SrwO4k7BmB4BTfjt78/XXX6ugoED9+vVTbm6uQ1/r1q0d3u/fv19PP/20Q9s999yj2bNnO11H27Zt7X/29fVVw4YNtX//fklSYmKiDh8+7HBqyhijgoICpaam6uDBg3JxcXGor1GjRqpatarTdTjryhN6fv09NmzYUGvXrrW/5zleQMki7AAokvr168tms+m7775zaK9Xr56ky6dofutqp7t+G5aMMfa2ChUq2NuuuHJqqiiu7KegoEBDhgzRyJEjC40JCQnRgQMHrlrLjXh7e+v48eOF2n/55RdJko+Pzw33cSWQ1a1b197m6uqq+vXrO1ULgKLjNBaAIvHz81NUVJTmzp3rsNDXGY0bN9a2bdsc2rZv367GjRtLkvz9/SXJ4fLsXy9W/rUdO3bY/5yZmamDBw/aTxH9/ve/1969e1W/fv1CL1dXVzVu3Fj5+fnatWuXfR8HDhywh5ZradSokfbs2aMLFy44tO/cuVP+/v6qVq3adbfPycnRwoULdd9999mPFUDpI+wAKLJ58+YpPz9frVu31jvvvKP9+/frwIEDWr58ub777jtVrFjxutuPGzdO8fHxWrBggQ4dOqS4uDi99957Gjt2rKTLs0Nt2rTRK6+8on379umLL77Q3//+96vu6//9v/+nDRs2aM+ePRo4cKCqV69uvwfOhAkT9OWXX2rYsGFKTk7WoUOHtHbtWo0YMULS5dNGXbp00VNPPaWvvvpKiYmJevLJJ686O/Vr/fr1k4uLix5//HHt2rVLR44c0fLlyzV16lSNGzeu0PiMjAylp6fr0KFDWrlype655x79+OOPmj9//o2+agAlyQCAE06fPm2GDx9uQkNDTaVKlYynp6e5++67zYwZM8y5c+fs4ySZhISEQtvPmzfP1KtXz1SqVMk0aNDALF261KF/3759pk2bNsbd3d20aNHCrFu3zkgymzZtMsYYs2nTJiPJfPDBB6ZJkybG1dXV/OEPfzDJyckO+/n6669NVFSU8fT0NB4eHqZZs2bm5ZdftvenpaWZBx54wLi5uZmQkBCzdOlSU6dOHTNr1qzrHv+hQ4fMww8/bGrVqmU8PDxMeHi4mTt3rrl06ZJ9zJUaJRmbzWa8vLxM8+bNzbhx40xaWprD/iZPnmyaN29+3c8EUDw2Y351chwAyrnNmzfr/vvvV2Zm5i1ZUAzg9sdpLAAAYGmEHQAAYGmcxgIAAJbGzA4AALA0wg4AALA0wg4AALA0wg4AALA0wg4AALA0wg4AALA0wg4AALA0wg4AALC0/w9estkXaG85AwAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ax = performance_df.plot.bar(\n",
+    "    color=\"#7400ff\", ylim=(1, 500), rot=0, ylabel=\"Speedup factor\"\n",
+    ")\n",
+    "ax.bar_label(ax.containers[0], fmt=\"%.0f\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# System Configuration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## CPU Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Architecture:                    x86_64\n",
+      "CPU op-mode(s):                  32-bit, 64-bit\n",
+      "Byte Order:                      Little Endian\n",
+      "Address sizes:                   46 bits physical, 48 bits virtual\n",
+      "CPU(s):                          80\n",
+      "On-line CPU(s) list:             0-79\n",
+      "Thread(s) per core:              2\n",
+      "Core(s) per socket:              20\n",
+      "Socket(s):                       2\n",
+      "NUMA node(s):                    2\n",
+      "Vendor ID:                       GenuineIntel\n",
+      "CPU family:                      6\n",
+      "Model:                           85\n",
+      "Model name:                      Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz\n",
+      "Stepping:                        7\n",
+      "CPU MHz:                         800.049\n",
+      "CPU max MHz:                     3900.0000\n",
+      "CPU min MHz:                     800.0000\n",
+      "BogoMIPS:                        4200.00\n",
+      "Virtualization:                  VT-x\n",
+      "L1d cache:                       1.3 MiB\n",
+      "L1i cache:                       1.3 MiB\n",
+      "L2 cache:                        40 MiB\n",
+      "L3 cache:                        55 MiB\n",
+      "NUMA node0 CPU(s):               0-19,40-59\n",
+      "NUMA node1 CPU(s):               20-39,60-79\n",
+      "Vulnerability Itlb multihit:     KVM: Mitigation: Split huge pages\n",
+      "Vulnerability L1tf:              Not affected\n",
+      "Vulnerability Mds:               Not affected\n",
+      "Vulnerability Meltdown:          Not affected\n",
+      "Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled v\n",
+      "                                 ia prctl and seccomp\n",
+      "Vulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user\n",
+      "                                  pointer sanitization\n",
+      "Vulnerability Spectre v2:        Mitigation; Enhanced IBRS, IBPB conditional, RS\n",
+      "                                 B filling\n",
+      "Vulnerability Srbds:             Not affected\n",
+      "Vulnerability Tsx async abort:   Mitigation; TSX disabled\n",
+      "Flags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtr\n",
+      "                                 r pge mca cmov pat pse36 clflush dts acpi mmx f\n",
+      "                                 xsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rd\n",
+      "                                 tscp lm constant_tsc art arch_perfmon pebs bts \n",
+      "                                 rep_good nopl xtopology nonstop_tsc cpuid aperf\n",
+      "                                 mperf pni pclmulqdq dtes64 monitor ds_cpl vmx s\n",
+      "                                 mx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid d\n",
+      "                                 ca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadli\n",
+      "                                 ne_timer aes xsave avx f16c rdrand lahf_lm abm \n",
+      "                                 3dnowprefetch cpuid_fault epb cat_l3 cdp_l3 inv\n",
+      "                                 pcid_single intel_ppin ssbd mba ibrs ibpb stibp\n",
+      "                                  ibrs_enhanced tpr_shadow vnmi flexpriority ept\n",
+      "                                  vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep\n",
+      "                                  bmi2 erms invpcid cqm mpx rdt_a avx512f avx512\n",
+      "                                 dq rdseed adx smap clflushopt clwb intel_pt avx\n",
+      "                                 512cd avx512bw avx512vl xsaveopt xsavec xgetbv1\n",
+      "                                  xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm\n",
+      "                                 _mbm_local dtherm ida arat pln pts hwp hwp_act_\n",
+      "                                 window hwp_epp hwp_pkg_req pku ospke avx512_vnn\n",
+      "                                 i md_clear flush_l1d arch_capabilities\n"
+     ]
+    }
+   ],
+   "source": [
+    "!lscpu"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## GPU Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mon Feb  6 17:43:52 2023       \n",
+      "+-----------------------------------------------------------------------------+\n",
+      "| NVIDIA-SMI 525.60.04    Driver Version: 525.60.04    CUDA Version: 12.0     |\n",
+      "|-------------------------------+----------------------+----------------------+\n",
+      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
+      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
+      "|                               |                      |               MIG M. |\n",
+      "|===============================+======================+======================|\n",
+      "|   0  H100 80GB HBM2e     On   | 00000000:1E:00.0 Off |                    0 |\n",
+      "| N/A   30C    P0    60W / 700W |      0MiB / 81559MiB |      0%      Default |\n",
+      "|                               |                      |             Disabled |\n",
+      "+-------------------------------+----------------------+----------------------+\n",
+      "|   1  H100 80GB HBM2e     On   | 00000000:22:00.0 Off |                    0 |\n",
+      "| N/A   30C    P0    60W / 700W |      0MiB / 81559MiB |      0%      Default |\n",
+      "|                               |                      |             Disabled |\n",
+      "+-------------------------------+----------------------+----------------------+\n",
+      "                                                                               \n",
+      "+-----------------------------------------------------------------------------+\n",
+      "| Processes:                                                                  |\n",
+      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
+      "|        ID   ID                                                   Usage      |\n",
+      "|=============================================================================|\n",
+      "|  No running processes found                                                 |\n",
+      "+-----------------------------------------------------------------------------+\n"
+     ]
+    }
+   ],
+   "source": [
+    "!nvidia-smi"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "b4f3463dcc83b00b9c65791e378b11fabec52613a2a7831cd4af76c548ff6047"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/performance_comparisons.ipynb b/notebooks/performance_comparisons.ipynb
new file mode 120000
index 00000000000..68c8aa19eee
--- /dev/null
+++ b/notebooks/performance_comparisons.ipynb
@@ -0,0 +1 @@
+../docs/cudf/source/user_guide/performance_comparisons.ipynb
\ No newline at end of file

From ef9780f37674340f01904a2f70e9b09001209c2e Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Wed, 8 Mar 2023 15:21:38 -0600
Subject: [PATCH 46/60] Add try/except for expected null-schema error in
 read_parquet (#12756)

Temporary workaround for https://github.com/rapidsai/cudf/issues/12702 - Adds a try/except to concatenate distinct `cudf.read_parquet` results instead of requiring file aggregation to occur within libcudf.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12756
---
 python/dask_cudf/dask_cudf/io/parquet.py      | 38 +++++++++++++++----
 .../dask_cudf/io/tests/test_parquet.py        | 14 +++++++
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index 1e3ff63ce76..962662061b5 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -93,14 +93,36 @@ def _read_paths(
                 )
 
             # Use cudf to read in data
-            df = cudf.read_parquet(
-                paths_or_fobs,
-                engine="cudf",
-                columns=columns,
-                row_groups=row_groups if row_groups else None,
-                strings_to_categorical=strings_to_categorical,
-                **kwargs,
-            )
+            try:
+                df = cudf.read_parquet(
+                    paths_or_fobs,
+                    engine="cudf",
+                    columns=columns,
+                    row_groups=row_groups if row_groups else None,
+                    strings_to_categorical=strings_to_categorical,
+                    **kwargs,
+                )
+            except RuntimeError as err:
+                # TODO: Remove try/except after null-schema issue is resolved
+                # (See: https://github.com/rapidsai/cudf/issues/12702)
+                if len(paths_or_fobs) > 1:
+                    df = cudf.concat(
+                        [
+                            cudf.read_parquet(
+                                pof,
+                                engine="cudf",
+                                columns=columns,
+                                row_groups=row_groups[i]
+                                if row_groups
+                                else None,
+                                strings_to_categorical=strings_to_categorical,
+                                **kwargs,
+                            )
+                            for i, pof in enumerate(paths_or_fobs)
+                        ]
+                    )
+                else:
+                    raise err
 
         if partitions and partition_keys is None:
 
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
index 062690014a4..355a1a5d73a 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
@@ -502,3 +502,17 @@ def test_check_file_size(tmpdir):
     cudf.DataFrame({"a": np.arange(1000)}).to_parquet(fn)
     with pytest.warns(match="large parquet file"):
         dask_cudf.read_parquet(fn, check_file_size=1).compute()
+
+
+def test_nullable_schema_mismatch(tmpdir):
+    # See: https://github.com/rapidsai/cudf/issues/12702
+    path0 = str(tmpdir.join("test.0.parquet"))
+    path1 = str(tmpdir.join("test.1.parquet"))
+    cudf.DataFrame.from_dict({"a": [1, 2, 3]}).to_parquet(path0)
+    cudf.DataFrame.from_dict({"a": [4, 5, None]}).to_parquet(path1)
+    with dask.config.set({"dataframe.backend": "cudf"}):
+        ddf = dd.read_parquet(
+            [path0, path1], split_row_groups=2, aggregate_files=True
+        )
+        expect = pd.read_parquet([path0, path1])
+    dd.assert_eq(ddf, expect, check_index=False)

From 4497d62955264c9c8882f804791c0d551a09c293 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <wence@gmx.li>
Date: Wed, 8 Mar 2023 23:30:16 +0000
Subject: [PATCH 47/60] Deprecate `line_terminator` in favor of
 `lineterminator` in `to_csv` (#12896)

Pandas 1.5 deprecated line_terminator in favour of lineterminator (to align with read_csv), and removed it in 2.0. Align the cuDF API by preferring lineterminator and providing a deprecation warning for use of line_terminator. Closes #12894.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12896
---
 docs/cudf/source/conf.py                      |  1 +
 .../cudf/_fuzz_testing/tests/fuzz_test_csv.py | 14 ++---
 python/cudf/cudf/_lib/csv.pyx                 |  4 +-
 python/cudf/cudf/core/dataframe.py            | 27 +++++++---
 python/cudf/cudf/io/csv.py                    |  8 +--
 python/cudf/cudf/tests/test_csv.py            | 53 ++++++-------------
 python/cudf/cudf/utils/ioutils.py             | 13 ++++-
 7 files changed, 62 insertions(+), 58 deletions(-)

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index af57f9245a8..41584d47190 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -203,6 +203,7 @@
     "cupy": ("https://docs.cupy.dev/en/stable/", None),
     "numpy": ("https://numpy.org/doc/stable", None),
     "pyarrow": ("https://arrow.apache.org/docs/", None),
+    "pandas": ("https://pandas.pydata.org/docs/", None),
 }
 
 # Config numpydoc
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
index 9b6abeb1276..f8f674fecec 100644
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
+++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 import sys
 from io import StringIO
@@ -54,12 +54,12 @@ def csv_writer_test(pdf):
         ],
         "columns": ALL_POSSIBLE_VALUES,
         "index": [True, False],
-        "line_terminator": ["\n", "\r", "\r\n"],
+        "lineterminator": ["\n", "\r", "\r\n"],
         "chunksize": ALL_POSSIBLE_VALUES,
     },
 )
 def csv_writer_test_params(
-    pdf, sep, header, na_rep, columns, index, line_terminator, chunksize
+    pdf, sep, header, na_rep, columns, index, lineterminator, chunksize
 ):
     gdf = cudf.from_pandas(pdf)
 
@@ -69,7 +69,7 @@ def csv_writer_test_params(
         na_rep=na_rep,
         columns=columns,
         index=index,
-        line_terminator=line_terminator,
+        lineterminator=lineterminator,
         chunksize=chunksize,
     )
     gd_buffer = gdf.to_csv(
@@ -78,7 +78,7 @@ def csv_writer_test_params(
         na_rep=na_rep,
         columns=columns,
         index=index,
-        line_terminator=line_terminator,
+        lineterminator=lineterminator,
         chunksize=chunksize,
     )
 
@@ -90,13 +90,13 @@ def csv_writer_test_params(
         StringIO(gd_buffer),
         delimiter=sep,
         na_values=na_rep,
-        lineterminator=line_terminator,
+        lineterminator=lineterminator,
     )
     expected = pd.read_csv(
         StringIO(pd_buffer),
         delimiter=sep,
         na_values=na_rep,
-        lineterminator=line_terminator,
+        lineterminator=lineterminator,
     )
     if not header:
         # TODO: Remove renaming columns once the following bug is fixed:
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index 09de1f1724e..ce57ea26360 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -472,7 +472,7 @@ def write_csv(
     object sep=",",
     object na_rep="",
     bool header=True,
-    object line_terminator="\n",
+    object lineterminator="\n",
     int rows_per_chunk=8,
     bool index=True,
 ):
@@ -488,7 +488,7 @@ def write_csv(
     )
     cdef bool include_header_c = header
     cdef char delim_c = ord(sep)
-    cdef string line_term_c = line_terminator.encode()
+    cdef string line_term_c = lineterminator.encode()
     cdef string na_c = na_rep.encode()
     cdef int rows_per_chunk_c = rows_per_chunk
     cdef vector[string] col_names
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index fd6e9e2687d..e50c324a8f4 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6,6 +6,7 @@
 import inspect
 import itertools
 import numbers
+import os
 import pickle
 import re
 import sys
@@ -604,7 +605,6 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin):
     def __init__(
         self, data=None, index=None, columns=None, dtype=None, nan_as_null=True
     ):
-
         super().__init__()
 
         if isinstance(columns, (Series, cudf.BaseIndex)):
@@ -918,7 +918,7 @@ def _init_from_dict_like(
 
         if len(data):
             self._data.multiindex = True
-            for (i, col_name) in enumerate(data):
+            for i, col_name in enumerate(data):
                 self._data.multiindex = self._data.multiindex and isinstance(
                     col_name, tuple
                 )
@@ -1199,7 +1199,6 @@ def __setitem__(self, arg, value):
                     if is_scalar(value):
                         self._data[col_name][scatter_map] = value
                     else:
-
                         self._data[col_name][scatter_map] = column.as_column(
                             value
                         )[scatter_map]
@@ -5445,7 +5444,6 @@ def interpolate(
         downcast=None,
         **kwargs,
     ):
-
         if all(dt == np.dtype("object") for dt in self.dtypes):
             raise TypeError(
                 "Cannot interpolate with all object-dtype "
@@ -6358,13 +6356,29 @@ def to_csv(
         index=True,
         encoding=None,
         compression=None,
-        line_terminator="\n",
+        lineterminator=None,
+        line_terminator=None,
         chunksize=None,
         storage_options=None,
     ):
         """{docstring}"""
         from cudf.io import csv
 
+        if line_terminator is not None:
+            warnings.warn(
+                "line_terminator is a deprecated keyword argument, "
+                "use lineterminator instead.",
+                FutureWarning,
+            )
+            if lineterminator is not None:
+                warnings.warn(
+                    f"Ignoring {line_terminator=} in favor "
+                    f"of {lineterminator=}"
+                )
+            else:
+                lineterminator = line_terminator
+        if lineterminator is None:
+            lineterminator = os.linesep
         return csv.to_csv(
             self,
             path_or_buf=path_or_buf,
@@ -6373,7 +6387,7 @@ def to_csv(
             columns=columns,
             header=header,
             index=index,
-            line_terminator=line_terminator,
+            lineterminator=lineterminator,
             chunksize=chunksize,
             encoding=encoding,
             compression=compression,
@@ -6738,7 +6752,6 @@ def append(
             current_cols = self._data.to_pandas_index()
             combined_columns = other.index.to_pandas()
             if len(current_cols):
-
                 if cudf.utils.dtypes.is_mixed_with_object_dtype(
                     current_cols, combined_columns
                 ):
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index 1eacbbb4458..95e0aa18070 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 from collections import abc
 from io import BytesIO, StringIO
@@ -155,7 +155,7 @@ def to_csv(
     index=True,
     encoding=None,
     compression=None,
-    line_terminator="\n",
+    lineterminator="\n",
     chunksize=None,
     storage_options=None,
 ):
@@ -233,7 +233,7 @@ def to_csv(
                 sep=sep,
                 na_rep=na_rep,
                 header=header,
-                line_terminator=line_terminator,
+                lineterminator=lineterminator,
                 rows_per_chunk=rows_per_chunk,
                 index=index,
             )
@@ -244,7 +244,7 @@ def to_csv(
             sep=sep,
             na_rep=na_rep,
             header=header,
-            line_terminator=line_terminator,
+            lineterminator=lineterminator,
             rows_per_chunk=rows_per_chunk,
             index=index,
         )
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 6066cd3b03e..4a7804da62c 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -223,7 +223,6 @@ def _make_path_or_buf(src):
 @pytest.mark.parametrize("dtype", dtypes)
 @pytest.mark.parametrize("nelem", nelem)
 def test_csv_reader_numeric_data(dtype, nelem, tmpdir):
-
     fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file1.csv")
 
     df = make_numeric_dataframe(nelem, dtype)
@@ -262,7 +261,6 @@ def test_csv_reader_datetime(parse_dates):
 def test_csv_reader_mixed_data_delimiter_sep(
     tmpdir, pandas_arg, cudf_arg, pd_mixed_dataframe
 ):
-
     fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file3.csv")
 
     pd_mixed_dataframe.to_csv(fname, sep="|", index=False, header=False)
@@ -342,7 +340,6 @@ def test_csv_reader_dtype_extremes(use_names):
 
 
 def test_csv_reader_skiprows_skipfooter(tmpdir, pd_mixed_dataframe):
-
     fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file5.csv")
 
     pd_mixed_dataframe.to_csv(
@@ -531,7 +528,6 @@ def test_csv_reader_float_decimal(tmpdir):
 
 
 def test_csv_reader_NaN_values():
-
     names = dtypes = ["float32"]
     empty_cells = '\n""\n'
     default_na_cells = (
@@ -638,7 +634,6 @@ def test_csv_reader_thousands(tmpdir):
 
 
 def test_csv_reader_buffer_strings():
-
     names = ["text", "int"]
     dtypes = ["str", "int"]
     lines = [",".join(names), "a,0", "b,0", "c,0", "d,0"]
@@ -682,7 +677,6 @@ def test_csv_reader_buffer_strings():
 def test_csv_reader_compression(
     tmpdir, ext, out_comp, in_comp, pd_mixed_dataframe
 ):
-
     fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_compression" + ext)
 
     df = pd_mixed_dataframe
@@ -959,7 +953,6 @@ def test_csv_reader_gzip_compression_strings(tmpdir):
 @pytest.mark.parametrize("skip_rows", [0, 2, 4])
 @pytest.mark.parametrize("header_row", [0, 2])
 def test_csv_reader_skiprows_header(skip_rows, header_row):
-
     names = ["float_point", "integer"]
     dtypes = ["float64", "int64"]
     lines = [
@@ -1023,7 +1016,6 @@ def test_csv_reader_dtype_inference_whitespace():
 
 
 def test_csv_reader_empty_dataframe():
-
     dtypes = ["float64", "int64"]
     buffer = "float_point, integer"
 
@@ -1208,24 +1200,23 @@ def test_csv_reader_byte_range_strings(segment_bytes):
         ("infer", 5, False),
     ],
 )
-@pytest.mark.parametrize("line_terminator", ["\n", "\r\n"])
+@pytest.mark.parametrize("lineterminator", ["\n", "\r\n"])
 def test_csv_reader_blanks_and_comments(
-    skip_rows, header_row, skip_blanks, line_terminator
+    skip_rows, header_row, skip_blanks, lineterminator
 ):
-
     lines = [
         "# first comment line",
-        line_terminator,
+        lineterminator,
         "# third comment line",
         "1,2,3",
         "4,5,6",
         "7,8,9",
-        line_terminator,
+        lineterminator,
         "# last comment line",
-        line_terminator,
+        lineterminator,
         "1,1,1",
     ]
-    buffer = line_terminator.join(lines)
+    buffer = lineterminator.join(lines)
 
     cu_df = read_csv(
         StringIO(buffer),
@@ -1247,7 +1238,6 @@ def test_csv_reader_blanks_and_comments(
 
 
 def test_csv_reader_prefix():
-
     lines = ["1, 1, 1, 1"]
     buffer = "\n".join(lines)
 
@@ -1521,11 +1511,10 @@ def test_csv_reader_scientific_type_detection():
         assert np.isclose(df[col][0], expected[int(col)])
 
 
-@pytest.mark.parametrize("line_terminator", ["\n", "\r\n"])
-def test_csv_blank_first_row(line_terminator):
-
+@pytest.mark.parametrize("lineterminator", ["\n", "\r\n"])
+def test_csv_blank_first_row(lineterminator):
     lines = ["colA,colB", "", "1, 1.1", "2, 2.2"]
-    buffer = line_terminator.join(lines)
+    buffer = lineterminator.join(lines)
 
     cu_df = read_csv(StringIO(buffer))
 
@@ -1588,7 +1577,6 @@ def test_csv_reader_partial_dtype(dtype):
 
 
 def test_csv_writer_file_handle(tmpdir):
-
     df = pd.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
     gdf = cudf.from_pandas(df)
 
@@ -1602,7 +1590,6 @@ def test_csv_writer_file_handle(tmpdir):
 
 
 def test_csv_writer_file_append(tmpdir):
-
     gdf1 = cudf.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
     gdf2 = cudf.DataFrame({"a": [4, 5, 6], "b": ["foo", "bar", "baz"]})
 
@@ -1618,7 +1605,6 @@ def test_csv_writer_file_append(tmpdir):
 
 
 def test_csv_writer_buffer(tmpdir):
-
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
 
     buffer = BytesIO()
@@ -1631,7 +1617,6 @@ def test_csv_writer_buffer(tmpdir):
 @pytest.mark.parametrize("dtype", dtypes)
 @pytest.mark.parametrize("nelem", nelem)
 def test_csv_writer_numeric_data(dtype, nelem, tmpdir):
-
     pdf_df_fname = tmpdir.join("pdf_df_1.csv")
     gdf_df_fname = tmpdir.join("gdf_df_1.csv")
 
@@ -1665,24 +1650,22 @@ def test_csv_writer_datetime_data(tmpdir):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("line_terminator", ["\r", "\n", "\t", np.str_("\n")])
+@pytest.mark.parametrize("lineterminator", ["\r", "\n", "\t", np.str_("\n")])
 @pytest.mark.parametrize("sep", [",", "/", np.str_(",")])
-def test_csv_writer_terminator_sep(line_terminator, sep, cudf_mixed_dataframe):
+def test_csv_writer_terminator_sep(lineterminator, sep, cudf_mixed_dataframe):
     df = cudf_mixed_dataframe
 
     buffer = BytesIO()
-    df.to_csv(buffer, line_terminator=line_terminator, sep=sep, index=False)
+    df.to_csv(buffer, lineterminator=lineterminator, sep=sep, index=False)
 
-    got = read_csv(buffer, lineterminator=line_terminator, sep=sep)
+    got = read_csv(buffer, lineterminator=lineterminator, sep=sep)
     assert_eq(df, got)
 
 
 @pytest.mark.parametrize(
-    "line_terminator", ["\r\n", "ABC", "\t\t", np.str_("\r\n")]
+    "lineterminator", ["\r\n", "ABC", "\t\t", np.str_("\r\n")]
 )
-def test_csv_writer_multichar_terminator(
-    line_terminator, cudf_mixed_dataframe
-):
+def test_csv_writer_multichar_terminator(lineterminator, cudf_mixed_dataframe):
     df = cudf_mixed_dataframe
 
     default_terminator_csv = StringIO()
@@ -1690,10 +1673,10 @@ def test_csv_writer_multichar_terminator(
 
     # Need to check manually since readers don't support
     # multicharacter line terminators
-    expected = default_terminator_csv.getvalue().replace("\n", line_terminator)
+    expected = default_terminator_csv.getvalue().replace("\n", lineterminator)
 
     buffer = StringIO()
-    df.to_csv(buffer, line_terminator=line_terminator)
+    df.to_csv(buffer, lineterminator=lineterminator)
     got = buffer.getvalue()
 
     assert_eq(expected, got)
@@ -1827,7 +1810,6 @@ def test_to_csv_StringIO(df):
 
 
 def test_csv_writer_empty_dataframe(tmpdir):
-
     df_fname = tmpdir.join("gdf_df_5.csv")
     gdf = cudf.DataFrame({"float_point": [], "integer": []})
     gdf["float_point"] = gdf["float_point"].astype("float")
@@ -2225,7 +2207,6 @@ def test_default_float_bitwidth_partial(default_float_bitwidth):
     ],
 )
 def test_column_selection_plus_column_names(usecols, names):
-
     lines = [
         "num,datetime,text",
         "123,2018-11-13T12:00:00,abc",
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 924cc62fb15..5f39c8722d9 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1226,7 +1226,16 @@
 compression : str, None
     A string representing the compression scheme to use in the the output file
     Compression while writing csv is not supported currently
-line_terminator : char, default '\\n'
+line_terminator : str, optional
+
+    .. deprecated:: 23.04
+
+        Replaced with ``lineterminator`` for consistency with
+        :meth:`cudf.read_csv` and :meth:`pandas.DataFrame.to_csv`
+
+lineterminator : str, optional
+    The newline character or character sequence to use in the output file.
+    Defaults to :data:`os.linesep`.
 chunksize : int or None, default None
     Rows to write at a time
 storage_options : dict, optional, default None
@@ -1236,6 +1245,7 @@
     For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
     pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
     ``urllib`` for more details.
+
 Returns
 -------
 None or str
@@ -1651,7 +1661,6 @@ def get_reader_filepath_or_buffer(
     path_or_data = stringify_pathlike(path_or_data)
 
     if isinstance(path_or_data, str):
-
         # Get a filesystem object if one isn't already available
         paths = [path_or_data]
         if fs is None:

From e16cf2d6bbd93d3bace54cc07a3eb874cc5c084f Mon Sep 17 00:00:00 2001
From: AJ Schmidt <ajschmidt8@users.noreply.github.com>
Date: Wed, 8 Mar 2023 18:39:26 -0500
Subject: [PATCH 48/60] Pass `SCCACHE_S3_USE_SSL` to conda builds (#12910)

This PR passes `SCCACHE_S3_USE_SSL` to the conda build environment

`SCCACHE_S3_USE_SSL` has been reported to increase `sccache` performance for S3.

Authors:
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12910
---
 conda/recipes/cudf/meta.yaml       | 1 +
 conda/recipes/cudf_kafka/meta.yaml | 1 +
 conda/recipes/custreamz/meta.yaml  | 1 +
 conda/recipes/dask-cudf/meta.yaml  | 1 +
 conda/recipes/libcudf/meta.yaml    | 1 +
 5 files changed, 5 insertions(+)

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 8cf3fc61e7c..e0f33ad40c7 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -31,6 +31,7 @@ build:
     - SCCACHE_REGION
     - SCCACHE_S3_KEY_PREFIX=cudf-aarch64 # [aarch64]
     - SCCACHE_S3_KEY_PREFIX=cudf-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
   ignore_run_exports:
     # libcudf's run_exports pinning is looser than we would like
     - libcudf
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index ab92220d525..4ae47ef1c10 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -30,6 +30,7 @@ build:
     - SCCACHE_REGION
     - SCCACHE_S3_KEY_PREFIX=cudf-kafka-aarch64 # [aarch64]
     - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
 
 requirements:
   build:
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 277c5d6cc17..5fafa7464db 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -30,6 +30,7 @@ build:
     - SCCACHE_REGION
     - SCCACHE_S3_KEY_PREFIX=custreamz-aarch64 # [aarch64]
     - SCCACHE_S3_KEY_PREFIX=custreamz-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
 
 requirements:
   host:
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index 85dd3231635..79f1f09858a 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -31,6 +31,7 @@ build:
     - SCCACHE_REGION
     - SCCACHE_S3_KEY_PREFIX=dask-cudf-aarch64 # [aarch64]
     - SCCACHE_S3_KEY_PREFIX=dask-cudf-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
 
 requirements:
   host:
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 890ab9f6632..309868b8144 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -29,6 +29,7 @@ build:
     - SCCACHE_REGION
     - SCCACHE_S3_KEY_PREFIX=libcudf-aarch64 # [aarch64]
     - SCCACHE_S3_KEY_PREFIX=libcudf-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
 
 requirements:
   build:

From be0f5835344477cce667178c7025935f24301ecc Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 8 Mar 2023 16:19:27 -0800
Subject: [PATCH 49/60] Add `stream` and `mr` parameters for
 `structs::detail::flatten_nested_columns` (#12892)

The internal API  `structs::detail::flatten_nested_columns` currently always use default stream and device resources. This PR adds `stream` and `mr` parameters for it, allowing to pass in the current `stream` and `mr` values.

Closes https://github.com/rapidsai/cudf/issues/12349.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Divye Gala (https://github.com/divyegala)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/12892
---
 cpp/include/cudf/detail/structs/utilities.hpp |  8 ++-
 cpp/include/cudf/detail/unary.hpp             | 12 +++-
 cpp/src/join/hash_join.cu                     | 10 +--
 cpp/src/reductions/struct_minmax_util.cuh     |  8 ++-
 cpp/src/search/contains_table.cu              |  4 +-
 cpp/src/structs/utilities.cpp                 | 33 ++++++----
 cpp/tests/structs/utilities_tests.cpp         | 65 +++++++++++--------
 7 files changed, 89 insertions(+), 51 deletions(-)

diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp
index c8b758ca337..1ac15e3bd04 100644
--- a/cpp/include/cudf/detail/structs/utilities.hpp
+++ b/cpp/include/cudf/detail/structs/utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -164,6 +164,8 @@ class flattened_table {
  * @param null_precedence null order for input table
  * @param nullability force output to have nullability columns even if input columns
  * are all valid
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate new device memory
  * @return `flatten_result` with flattened table, flattened column order, flattened null precedence,
  * alongside the supporting columns and device_buffers for the flattened table.
  */
@@ -171,7 +173,9 @@ class flattened_table {
   table_view const& input,
   std::vector<order> const& column_order,
   std::vector<null_order> const& null_precedence,
-  column_nullability nullability = column_nullability::MATCH_INCOMING);
+  column_nullability nullability,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Superimpose nulls from a given null mask into the input column, using bitwise AND.
diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp
index 0e1c047d9b0..b7ecedc1489 100644
--- a/cpp/include/cudf/detail/unary.hpp
+++ b/cpp/include/cudf/detail/unary.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -74,6 +74,16 @@ std::unique_ptr<cudf::column> unary_operation(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @copydoc cudf::is_valid
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<cudf::column> is_valid(
+  cudf::column_view const& input,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @copydoc cudf::cast
  *
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index cce917a24de..e87fa546527 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -299,7 +299,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
 
   // need to store off the owning structures for some of the views in _build
   _flattened_build_table = structs::detail::flatten_nested_columns(
-    build, {}, {}, structs::detail::column_nullability::FORCE);
+    build, {}, {}, structs::detail::column_nullability::FORCE, stream);
   _build = _flattened_build_table;
 
   if (_is_empty) { return; }
@@ -357,7 +357,7 @@ std::size_t hash_join<Hasher>::inner_join_size(cudf::table_view const& probe,
   if (_is_empty) { return 0; }
 
   auto flattened_probe = structs::detail::flatten_nested_columns(
-    probe, {}, {}, structs::detail::column_nullability::FORCE);
+    probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
   auto const flattened_probe_table = flattened_probe.flattened_columns();
 
   auto build_table_ptr           = cudf::table_device_view::create(_build, stream);
@@ -382,7 +382,7 @@ std::size_t hash_join<Hasher>::left_join_size(cudf::table_view const& probe,
   if (_is_empty) { return probe.num_rows(); }
 
   auto flattened_probe = structs::detail::flatten_nested_columns(
-    probe, {}, {}, structs::detail::column_nullability::FORCE);
+    probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
   auto const flattened_probe_table = flattened_probe.flattened_columns();
 
   auto build_table_ptr           = cudf::table_device_view::create(_build, stream);
@@ -408,7 +408,7 @@ std::size_t hash_join<Hasher>::full_join_size(cudf::table_view const& probe,
   if (_is_empty) { return probe.num_rows(); }
 
   auto flattened_probe = structs::detail::flatten_nested_columns(
-    probe, {}, {}, structs::detail::column_nullability::FORCE);
+    probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
   auto const flattened_probe_table = flattened_probe.flattened_columns();
 
   auto build_table_ptr           = cudf::table_device_view::create(_build, stream);
@@ -475,7 +475,7 @@ hash_join<Hasher>::compute_hash_join(cudf::table_view const& probe,
                "Probe column size is too big for hash join");
 
   auto flattened_probe = structs::detail::flatten_nested_columns(
-    probe, {}, {}, structs::detail::column_nullability::FORCE);
+    probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
   auto const flattened_probe_table = flattened_probe.flattened_columns();
 
   CUDF_EXPECTS(_build.num_columns() == flattened_probe_table.num_columns(),
diff --git a/cpp/src/reductions/struct_minmax_util.cuh b/cpp/src/reductions/struct_minmax_util.cuh
index a25d78d162a..c4fc5e1cfd6 100644
--- a/cpp/src/reductions/struct_minmax_util.cuh
+++ b/cpp/src/reductions/struct_minmax_util.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -98,7 +98,11 @@ class comparison_binop_generator {
 
   comparison_binop_generator(column_view const& input, rmm::cuda_stream_view stream, bool is_min_op)
     : flattened_input{cudf::structs::detail::flatten_nested_columns(
-        table_view{{input}}, {}, std::vector<null_order>{DEFAULT_NULL_ORDER})},
+        table_view{{input}},
+        {},
+        std::vector<null_order>{DEFAULT_NULL_ORDER},
+        cudf::structs::detail::column_nullability::MATCH_INCOMING,
+        stream)},
       d_flattened_input_ptr{table_device_view::create(flattened_input, stream)},
       is_min_op(is_min_op),
       has_nulls{has_nested_nulls(table_view{{input}})},
diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu
index c1cc4659a19..90eeda12480 100644
--- a/cpp/src/search/contains_table.cu
+++ b/cpp/src/search/contains_table.cu
@@ -326,9 +326,9 @@ rmm::device_uvector<bool> contains_without_lists_or_nans(table_view const& hayst
                                      ? structs::detail::column_nullability::FORCE
                                      : structs::detail::column_nullability::MATCH_INCOMING;
   auto const haystack_flattened_tables =
-    structs::detail::flatten_nested_columns(haystack, {}, {}, flatten_nullability);
+    structs::detail::flatten_nested_columns(haystack, {}, {}, flatten_nullability, stream);
   auto const needles_flattened_tables =
-    structs::detail::flatten_nested_columns(needles, {}, {}, flatten_nullability);
+    structs::detail::flatten_nested_columns(needles, {}, {}, flatten_nullability, stream);
   auto const haystack_flattened = haystack_flattened_tables.flattened_columns();
   auto const needles_flattened  = needles_flattened_tables.flattened_columns();
   auto const haystack_tdv_ptr   = table_device_view::create(haystack_flattened, stream);
diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp
index 6808ed4ea2e..5b853dafe57 100644
--- a/cpp/src/structs/utilities.cpp
+++ b/cpp/src/structs/utilities.cpp
@@ -18,11 +18,11 @@
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/structs/utilities.hpp>
+#include <cudf/detail/unary.hpp>
 #include <cudf/structs/structs_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
-#include <cudf/unary.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
 #include <cudf/utilities/traits.hpp>
@@ -87,22 +87,29 @@ bool is_or_has_nested_lists(cudf::column_view const& col)
  */
 struct table_flattener {
   table_view input;
-  // reference variables
   std::vector<order> const& column_order;
   std::vector<null_order> const& null_precedence;
-  // output
-  std::vector<std::unique_ptr<column>> validity_as_column;
+  column_nullability nullability;
+  rmm::cuda_stream_view stream;
+  rmm::mr::device_memory_resource* mr;
+
   temporary_nullable_data nullable_data;
+  std::vector<std::unique_ptr<column>> validity_as_column;
   std::vector<column_view> flat_columns;
   std::vector<order> flat_column_order;
   std::vector<null_order> flat_null_precedence;
-  column_nullability nullability;
 
   table_flattener(table_view const& input,
                   std::vector<order> const& column_order,
                   std::vector<null_order> const& null_precedence,
-                  column_nullability nullability)
-    : column_order(column_order), null_precedence(null_precedence), nullability(nullability)
+                  column_nullability nullability,
+                  rmm::cuda_stream_view stream,
+                  rmm::mr::device_memory_resource* mr)
+    : column_order{column_order},
+      null_precedence{null_precedence},
+      nullability{nullability},
+      stream{stream},
+      mr{mr}
   {
     superimpose_nulls(input);
     fail_if_unsupported_types(input);
@@ -114,7 +121,7 @@ struct table_flattener {
    */
   void superimpose_nulls(table_view const& input_table)
   {
-    auto [table, tmp_nullable_data] = push_down_nulls(input_table, cudf::get_default_stream());
+    auto [table, tmp_nullable_data] = push_down_nulls(input_table, stream, mr);
     this->input                     = std::move(table);
     this->nullable_data             = std::move(tmp_nullable_data);
   }
@@ -143,10 +150,10 @@ struct table_flattener {
     // sure the flattening results are tables having the same number of columns.
 
     if (nullability == column_nullability::FORCE || col.has_nulls()) {
-      validity_as_column.push_back(cudf::is_valid(col));
+      validity_as_column.push_back(cudf::detail::is_valid(col, stream, mr));
       if (col.has_nulls()) {
         // copy bitmask is needed only if the column has null
-        validity_as_column.back()->set_null_mask(copy_bitmask(col));
+        validity_as_column.back()->set_null_mask(cudf::detail::copy_bitmask(col, stream, mr));
       }
       flat_columns.push_back(validity_as_column.back()->view());
       if (not column_order.empty()) { flat_column_order.push_back(col_order); }  // doesn't matter.
@@ -197,12 +204,14 @@ struct table_flattener {
 flattened_table flatten_nested_columns(table_view const& input,
                                        std::vector<order> const& column_order,
                                        std::vector<null_order> const& null_precedence,
-                                       column_nullability nullability)
+                                       column_nullability nullability,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::mr::device_memory_resource* mr)
 {
   auto const has_struct = std::any_of(input.begin(), input.end(), is_struct);
   if (not has_struct) { return flattened_table{input, column_order, null_precedence, {}, {}}; }
 
-  return table_flattener{input, column_order, null_precedence, nullability}();
+  return table_flattener{input, column_order, null_precedence, nullability, stream, mr}();
 }
 
 namespace {
diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp
index f27290c3e06..5b560f22b05 100644
--- a/cpp/tests/structs/utilities_tests.cpp
+++ b/cpp/tests/structs/utilities_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,7 @@
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/null_mask.hpp>
+#include <cudf/utilities/default_stream.hpp>
 
 template <typename T>
 using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
@@ -54,9 +55,10 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel)
 
   auto table = cudf::table_view{{lists_col, nums_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(table,
-                                cudf::structs::detail::flatten_nested_columns(
-                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(
+    table,
+    cudf::structs::detail::flatten_nested_columns(
+      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
@@ -74,7 +76,8 @@ TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
                  cudf::table_view{{nums_col, structs_col}},
                  {},
                  {},
-                 cudf::structs::detail::column_nullability::FORCE),
+                 cudf::structs::detail::column_nullability::FORCE,
+                 cudf::get_default_stream()),
                cudf::logic_error);
 }
 
@@ -90,9 +93,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs)
 
   auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(table,
-                                cudf::structs::detail::flatten_nested_columns(
-                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(
+    table,
+    cudf::structs::detail::flatten_nested_columns(
+      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
@@ -116,9 +120,10 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
   auto expected = cudf::table_view{
     {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                cudf::structs::detail::flatten_nested_columns(
-                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(
+    expected,
+    cudf::structs::detail::flatten_nested_columns(
+      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
@@ -144,9 +149,10 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
   auto expected = cudf::table_view{
     {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                cudf::structs::detail::flatten_nested_columns(
-                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(
+    expected,
+    cudf::structs::detail::flatten_nested_columns(
+      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
@@ -183,9 +189,10 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
                                     expected_nums_col_3,
                                     expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                cudf::structs::detail::flatten_nested_columns(
-                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(
+    expected,
+    cudf::structs::detail::flatten_nested_columns(
+      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
@@ -223,9 +230,10 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
                                     expected_nums_col_3,
                                     expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                cudf::structs::detail::flatten_nested_columns(
-                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(
+    expected,
+    cudf::structs::detail::flatten_nested_columns(
+      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
@@ -264,9 +272,10 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
                                     expected_nums_col_3,
                                     expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                cudf::structs::detail::flatten_nested_columns(
-                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(
+    expected,
+    cudf::structs::detail::flatten_nested_columns(
+      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
@@ -305,9 +314,10 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
                                     expected_nums_col_3,
                                     expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                cudf::structs::detail::flatten_nested_columns(
-                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(
+    expected,
+    cudf::structs::detail::flatten_nested_columns(
+      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)
@@ -327,7 +337,8 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)
                  cudf::table_view{{structs_with_lists_col}},
                  {},
                  {},
-                 cudf::structs::detail::column_nullability::FORCE),
+                 cudf::structs::detail::column_nullability::FORCE,
+                 cudf::get_default_stream()),
                cudf::logic_error);
 }
 

From a4e58eb8d1869b2a9db78faaf79780e722b754d8 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Wed, 8 Mar 2023 17:50:36 -0800
Subject: [PATCH 50/60] Rewrite CSV writer benchmark with nvbench (#12901)

This PR migrates the last piece of cuIO benchmarks to nvbench.

It also fixes a minor bug when counting the file size.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12901
---
 cpp/benchmarks/CMakeLists.txt        |   2 +-
 cpp/benchmarks/io/csv/csv_writer.cpp | 149 +++++++++++++++------------
 cpp/benchmarks/io/cuio_common.hpp    |   5 -
 3 files changed, 86 insertions(+), 70 deletions(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index e6b59c0b9f0..cc0b642a337 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -252,7 +252,7 @@ ConfigureNVBench(ORC_WRITER_NVBENCH io/orc/orc_writer.cpp io/orc/orc_writer_chun
 
 # ##################################################################################################
 # * csv writer benchmark --------------------------------------------------------------------------
-ConfigureBench(CSV_WRITER_BENCH io/csv/csv_writer.cpp)
+ConfigureNVBench(CSV_WRITER_NVBENCH io/csv/csv_writer.cpp)
 
 # ##################################################################################################
 # * ast benchmark ---------------------------------------------------------------------------------
diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp
index 54a86094eb7..1ca6b5b2a9b 100644
--- a/cpp/benchmarks/io/csv/csv_writer.cpp
+++ b/cpp/benchmarks/io/csv/csv_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,91 +17,112 @@
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
 
 #include <cudf/io/csv.hpp>
 
+#include <nvbench/nvbench.cuh>
+
 // Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
 // run on most GPUs, but large enough to allow highest throughput
 constexpr size_t data_size         = 256 << 20;
 constexpr cudf::size_type num_cols = 64;
 
-class CsvWrite : public cudf::benchmark {
-};
-
-void BM_csv_write_varying_inout(benchmark::State& state)
+template <data_type DataType, cudf::io::io_type IO>
+void BM_csv_write_dtype_io(nvbench::state& state,
+                           nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
 {
-  auto const data_types = get_type_or_group(state.range(0));
-  auto const sink_type  = static_cast<io_type>(state.range(1));
+  auto const data_types = get_type_or_group(static_cast<int32_t>(DataType));
+  auto const sink_type  = IO;
 
   auto const tbl =
     create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size});
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(sink_type);
-  auto mem_stats_logger = cudf::memory_stats_logger();
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf::io::csv_writer_options options =
-      cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view);
-    cudf::io::write_csv(options);
-  }
-
-  state.SetBytesProcessed(data_size * state.iterations());
-  state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
-  state.counters["encoded_file_size"] = source_sink.size();
+  std::size_t encoded_file_size = 0;
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch, auto& timer) {
+               cuio_source_sink_pair source_sink(sink_type);
+
+               timer.start();
+               cudf::io::csv_writer_options options =
+                 cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view);
+               cudf::io::write_csv(options);
+               timer.stop();
+
+               encoded_file_size = source_sink.size();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-void BM_csv_write_varying_options(benchmark::State& state)
+void BM_csv_write_varying_options(nvbench::state& state)
 {
-  auto const na_per_len     = state.range(0);
-  auto const rows_per_chunk = 1 << state.range(1);
+  auto const na_per_len     = state.get_int64("na_per_len");
+  auto const rows_per_chunk = state.get_int64("rows_per_chunk");
 
-  auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL),
-                                             int32_t(type_group_id::FLOATING_POINT),
-                                             int32_t(type_group_id::FIXED_POINT),
-                                             int32_t(type_group_id::TIMESTAMP),
-                                             int32_t(type_group_id::DURATION),
-                                             int32_t(cudf::type_id::STRING)});
+  auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                             static_cast<int32_t>(data_type::FLOAT),
+                                             static_cast<int32_t>(data_type::DECIMAL),
+                                             static_cast<int32_t>(data_type::TIMESTAMP),
+                                             static_cast<int32_t>(data_type::DURATION),
+                                             static_cast<int32_t>(data_type::STRING)});
 
   auto const tbl  = create_random_table(data_types, table_size_bytes{data_size});
   auto const view = tbl->view();
 
   std::string const na_per(na_per_len, '#');
-  cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
-  auto mem_stats_logger = cudf::memory_stats_logger();
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf::io::csv_writer_options options =
-      cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view)
-        .na_rep(na_per)
-        .rows_per_chunk(rows_per_chunk);
-    cudf::io::write_csv(options);
-  }
-
-  state.SetBytesProcessed(data_size * state.iterations());
-  state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
-  state.counters["encoded_file_size"] = source_sink.size();
+  std::size_t encoded_file_size = 0;
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch, auto& timer) {
+               cuio_source_sink_pair source_sink(io_type::HOST_BUFFER);
+
+               timer.start();
+               cudf::io::csv_writer_options options =
+                 cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view)
+                   .na_rep(na_per)
+                   .rows_per_chunk(rows_per_chunk);
+               cudf::io::write_csv(options);
+               timer.stop();
+
+               encoded_file_size = source_sink.size();
+             });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-#define CSV_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type)       \
-  BENCHMARK_DEFINE_F(CsvWrite, name)                                  \
-  (::benchmark::State & state) { BM_csv_write_varying_inout(state); } \
-  BENCHMARK_REGISTER_F(CsvWrite, name)                                \
-    ->Args({int32_t(type_or_group), sink_type})                       \
-    ->Unit(benchmark::kMillisecond)                                   \
-    ->UseManualTime();
-
-WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, integral, type_group_id::INTEGRAL);
-WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
-WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
-WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
-WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, durations, type_group_id::DURATION);
-WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
-
-BENCHMARK_DEFINE_F(CsvWrite, writer_options)
-(::benchmark::State& state) { BM_csv_write_varying_options(state); }
-BENCHMARK_REGISTER_F(CsvWrite, writer_options)
-  ->ArgsProduct({{0, 16}, {8, 10, 12, 14, 16, 18, 20}})
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
+using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
+                                            data_type::FLOAT,
+                                            data_type::DECIMAL,
+                                            data_type::TIMESTAMP,
+                                            data_type::DURATION,
+                                            data_type::STRING>;
+
+using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
+                                        cudf::io::io_type::HOST_BUFFER,
+                                        cudf::io::io_type::VOID>;
+
+NVBENCH_BENCH_TYPES(BM_csv_write_dtype_io, NVBENCH_TYPE_AXES(d_type_list, io_list))
+  .set_name("csv_write_dtype_io")
+  .set_type_axes_names({"data_type", "io"})
+  .set_min_samples(4);
+
+NVBENCH_BENCH(BM_csv_write_varying_options)
+  .set_name("csv_write_options")
+  .set_min_samples(4)
+  .add_int64_axis("na_per_len", {0, 16})
+  .add_int64_power_of_two_axis("rows_per_chunk", nvbench::range(8, 20, 2));
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index eec165098ae..34adae30505 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -26,11 +26,6 @@
 
 using cudf::io::io_type;
 
-#define WR_BENCHMARK_DEFINE_ALL_SINKS(benchmark, name, type_or_group)                          \
-  benchmark(name##_file_output, type_or_group, static_cast<uint32_t>(io_type::FILEPATH));      \
-  benchmark(name##_buffer_output, type_or_group, static_cast<uint32_t>(io_type::HOST_BUFFER)); \
-  benchmark(name##_void_output, type_or_group, static_cast<uint32_t>(io_type::VOID));
-
 std::string random_file_in_dir(std::string const& dir_path);
 
 /**

From 9299c2b6369a0dcdc38a7f8495e9577e60cf6347 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 9 Mar 2023 07:44:54 -0500
Subject: [PATCH 51/60] Deprecate libcudf regex APIs accepting pattern strings
 directly (#12891)

Deprecates the libcudf regex APIs that do not accept `cudf::strings::regex_program` objects.
The libcudf regex functions were converted to accept `regex_program` objects instead of a pattern string and regex-flags directly in PR #11927
Most of this is removing calls to the deprecated functions in the gtests.
The declarations in the headers had to be reordered to make the reference links work in the doxygen output.

These deprecated functions will be removed in a future release.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12891
---
 cpp/benchmarks/string/contains.cpp          |  10 +-
 cpp/benchmarks/string/extract.cpp           |   6 +-
 cpp/benchmarks/string/replace_re.cpp        |   9 +-
 cpp/include/cudf/strings/contains.hpp       |  87 +++---
 cpp/include/cudf/strings/extract.hpp        |  82 +++---
 cpp/include/cudf/strings/findall.hpp        |  45 +--
 cpp/include/cudf/strings/replace_re.hpp     |  72 ++---
 cpp/include/cudf/strings/split/split_re.hpp | 178 ++++++------
 cpp/include/doxygen_groups.h                |   1 +
 cpp/tests/strings/contains_tests.cpp        | 306 +++++++-------------
 cpp/tests/strings/extract_tests.cpp         |  62 ++--
 cpp/tests/strings/findall_tests.cpp         |  24 +-
 cpp/tests/strings/replace_regex_tests.cpp   | 105 +++----
 cpp/tests/strings/split_tests.cpp           | 111 +++----
 14 files changed, 482 insertions(+), 616 deletions(-)

diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp
index f7f394ea048..a04915d1df8 100644
--- a/cpp/benchmarks/string/contains.cpp
+++ b/cpp/benchmarks/string/contains.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <cudf/filling.hpp>
 #include <cudf/strings/contains.hpp>
 #include <cudf/strings/findall.hpp>
+#include <cudf/strings/regex/regex_program.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
@@ -83,18 +84,19 @@ static void BM_contains(benchmark::State& state, contains_type ct)
   auto input = cudf::strings_column_view(col->view());
 
   auto pattern = patterns[pattern_index];
+  auto program = cudf::strings::regex_program::create(pattern);
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (ct) {
       case contains_type::contains:  // contains_re and matches_re use the same main logic
-        cudf::strings::contains_re(input, pattern);
+        cudf::strings::contains_re(input, *program);
         break;
       case contains_type::count:  // counts occurrences of matches
-        cudf::strings::count_re(input, pattern);
+        cudf::strings::count_re(input, *program);
         break;
       case contains_type::findall:  // returns occurrences of all matches
-        cudf::strings::findall(input, pattern);
+        cudf::strings::findall(input, *program);
         break;
     }
   }
diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp
index 4e9ac2f5395..4760956e049 100644
--- a/cpp/benchmarks/string/extract.cpp
+++ b/cpp/benchmarks/string/extract.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <cudf_test/column_wrapper.hpp>
 
 #include <cudf/strings/extract.hpp>
+#include <cudf/strings/regex/regex_program.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
 #include <random>
@@ -59,10 +60,11 @@ static void BM_extract(benchmark::State& state, int groups)
   auto input = cudf::gather(
     cudf::table_view{{samples_column}}, map->view(), cudf::out_of_bounds_policy::DONT_CHECK);
   cudf::strings_column_view strings_view(input->get_column(0).view());
+  auto prog = cudf::strings::regex_program::create(pattern);
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true);
-    auto results = cudf::strings::extract(strings_view, pattern);
+    auto results = cudf::strings::extract(strings_view, *prog);
   }
 
   state.SetBytesProcessed(state.iterations() * strings_view.chars_size());
diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp
index 7e9d6036750..b5dcf316a0e 100644
--- a/cpp/benchmarks/string/replace_re.cpp
+++ b/cpp/benchmarks/string/replace_re.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
 
 #include <cudf_test/column_wrapper.hpp>
 
+#include <cudf/strings/regex/regex_program.hpp>
 #include <cudf/strings/replace_re.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -40,18 +41,20 @@ static void BM_replace(benchmark::State& state, replace_type rt)
   auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
   cudf::strings_column_view input(column->view());
   cudf::test::strings_column_wrapper repls({"#", ""});
+  auto prog         = cudf::strings::regex_program::create("\\d+");
+  auto prog_backref = cudf::strings::regex_program::create("(\\d+)");
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (rt) {
       case replace_type::replace_re:  // contains_re and matches_re use the same main logic
-        cudf::strings::replace_re(input, "\\d+");
+        cudf::strings::replace_re(input, *prog);
         break;
       case replace_type::replace_re_multi:  // counts occurrences of pattern
         cudf::strings::replace_re(input, {"\\d+", "\\s+"}, cudf::strings_column_view(repls));
         break;
       case replace_type::replace_backref:  // returns occurrences of matches
-        cudf::strings::replace_with_backrefs(input, "(\\d+)", "#\\1X");
+        cudf::strings::replace_with_backrefs(input, *prog_backref, "#\\1X");
         break;
     }
   }
diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp
index aee349415e3..aebc4ae7dab 100644
--- a/cpp/include/cudf/strings/contains.hpp
+++ b/cpp/include/cudf/strings/contains.hpp
@@ -34,33 +34,6 @@ struct regex_program;
  * @brief Strings APIs for regex contains, count, matches
  */
 
-/**
- * @brief Returns a boolean column identifying rows which
- * match the given regex pattern.
- *
- * @code{.pseudo}
- * Example:
- * s = ["abc","123","def456"]
- * r = contains_re(s,"\\d+")
- * r is now [false, true, true]
- * @endcode
- *
- * Any null string entries return corresponding null output column entries.
- *
- * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
- *
- * @param strings Strings instance for this operation.
- * @param pattern Regex pattern to match to each string.
- * @param flags Regex flags for interpreting special characters in the pattern.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New column of boolean results for each string.
- */
-std::unique_ptr<column> contains_re(
-  strings_column_view const& strings,
-  std::string_view pattern,
-  regex_flags const flags             = regex_flags::DEFAULT,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
 /**
  * @brief Returns a boolean column identifying rows which
  * match the given regex_program object
@@ -89,26 +62,29 @@ std::unique_ptr<column> contains_re(
 
 /**
  * @brief Returns a boolean column identifying rows which
- * matching the given regex pattern but only at the beginning the string.
+ * match the given regex pattern.
  *
  * @code{.pseudo}
  * Example:
  * s = ["abc","123","def456"]
- * r = matches_re(s,"\\d+")
- * r is now [false, true, false]
+ * r = contains_re(s,"\\d+")
+ * r is now [false, true, true]
  * @endcode
  *
  * Any null string entries return corresponding null output column entries.
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
+ * @deprecated Use @link contains_re contains_re(strings_column_view const&,
+ * regex_program const&, rmm::mr::device_memory_resource*) @endlink
+ *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match to each string.
  * @param flags Regex flags for interpreting special characters in the pattern.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of boolean results for each string.
  */
-std::unique_ptr<column> matches_re(
+[[deprecated]] std::unique_ptr<column> contains_re(
   strings_column_view const& strings,
   std::string_view pattern,
   regex_flags const flags             = regex_flags::DEFAULT,
@@ -141,27 +117,30 @@ std::unique_ptr<column> matches_re(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Returns the number of times the given regex pattern
- * matches in each string.
+ * @brief Returns a boolean column identifying rows which
+ * matching the given regex pattern but only at the beginning the string.
  *
  * @code{.pseudo}
  * Example:
- * s = ["abc","123","def45"]
- * r = count_re(s,"\\d")
- * r is now [0, 3, 2]
+ * s = ["abc","123","def456"]
+ * r = matches_re(s,"\\d+")
+ * r is now [false, true, false]
  * @endcode
  *
  * Any null string entries return corresponding null output column entries.
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
+ * @deprecated Use @link matches_re matches_re(strings_column_view const&,
+ * regex_program const&, rmm::mr::device_memory_resource*) @endlink
+ *
  * @param strings Strings instance for this operation.
- * @param pattern Regex pattern to match within each string.
+ * @param pattern Regex pattern to match to each string.
  * @param flags Regex flags for interpreting special characters in the pattern.
  * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column with counts for each string.
+ * @return New column of boolean results for each string.
  */
-std::unique_ptr<column> count_re(
+[[deprecated]] std::unique_ptr<column> matches_re(
   strings_column_view const& strings,
   std::string_view pattern,
   regex_flags const flags             = regex_flags::DEFAULT,
@@ -193,6 +172,36 @@ std::unique_ptr<column> count_re(
   regex_program const& prog,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns the number of times the given regex pattern
+ * matches in each string.
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ["abc","123","def45"]
+ * r = count_re(s,"\\d")
+ * r is now [0, 3, 2]
+ * @endcode
+ *
+ * Any null string entries return corresponding null output column entries.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @deprecated Use @link count_re count_re(strings_column_view const&,
+ * regex_program const&, rmm::mr::device_memory_resource*) @endlink
+ *
+ * @param strings Strings instance for this operation.
+ * @param pattern Regex pattern to match within each string.
+ * @param flags Regex flags for interpreting special characters in the pattern.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New INT32 column with counts for each string.
+ */
+[[deprecated]] std::unique_ptr<column> count_re(
+  strings_column_view const& strings,
+  std::string_view pattern,
+  regex_flags const flags             = regex_flags::DEFAULT,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Returns a boolean column identifying rows which
  * match the given like pattern.
diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp
index a80d971438d..e1a940259ac 100644
--- a/cpp/include/cudf/strings/extract.hpp
+++ b/cpp/include/cudf/strings/extract.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,14 +27,14 @@ namespace strings {
 struct regex_program;
 
 /**
- * @addtogroup strings_substring
+ * @addtogroup strings_extract
  * @{
  * @file
  */
 
 /**
  * @brief Returns a table of strings columns where each column corresponds to the matching
- * group specified in the given regular expression pattern.
+ * group specified in the given regex_program object
  *
  * All the strings for the first group will go in the first output column; the second group
  * go in the second column and so on. Null entries are added to the columns in row `i` if
@@ -45,28 +45,27 @@ struct regex_program;
  * @code{.pseudo}
  * Example:
  * s = ["a1", "b2", "c3"]
- * r = extract(s, "([ab])(\\d)")
+ * p = regex_program::create("([ab])(\\d)")
+ * r = extract(s, p)
  * r is now [ ["a", "b", null],
  *            ["1", "2", null] ]
  * @endcode
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param strings Strings instance for this operation.
- * @param pattern The regular expression pattern with group indicators.
- * @param flags Regex flags for interpreting special characters in the pattern.
- * @param mr Device memory resource used to allocate the returned table's device memory.
- * @return Columns of strings extracted from the input column.
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate the returned table's device memory
+ * @return Columns of strings extracted from the input column
  */
 std::unique_ptr<table> extract(
   strings_column_view const& strings,
-  std::string_view pattern,
-  regex_flags const flags             = regex_flags::DEFAULT,
+  regex_program const& prog,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Returns a table of strings columns where each column corresponds to the matching
- * group specified in the given regex_program object
+ * group specified in the given regular expression pattern.
  *
  * All the strings for the first group will go in the first output column; the second group
  * go in the second column and so on. Null entries are added to the columns in row `i` if
@@ -77,27 +76,31 @@ std::unique_ptr<table> extract(
  * @code{.pseudo}
  * Example:
  * s = ["a1", "b2", "c3"]
- * p = regex_program::create("([ab])(\\d)")
- * r = extract(s, p)
+ * r = extract(s, "([ab])(\\d)")
  * r is now [ ["a", "b", null],
  *            ["1", "2", null] ]
  * @endcode
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param strings Strings instance for this operation
- * @param prog Regex program instance
- * @param mr Device memory resource used to allocate the returned table's device memory
- * @return Columns of strings extracted from the input column
+ * @deprecated Use @link extract extract(strings_column_view const&,
+ * regex_program const&, rmm::mr::device_memory_resource*) @endlink
+ *
+ * @param strings Strings instance for this operation.
+ * @param pattern The regular expression pattern with group indicators.
+ * @param flags Regex flags for interpreting special characters in the pattern.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
+ * @return Columns of strings extracted from the input column.
  */
-std::unique_ptr<table> extract(
+[[deprecated]] std::unique_ptr<table> extract(
   strings_column_view const& strings,
-  regex_program const& prog,
+  std::string_view pattern,
+  regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Returns a lists column of strings where each string column row corresponds to the
- * matching group specified in the given regular expression pattern.
+ * matching group specified in the given regex_program object
  *
  * All the matching groups for the first row will go in the first row output column; the second
  * row results will go into the second row output column and so on.
@@ -108,7 +111,8 @@ std::unique_ptr<table> extract(
  * @code{.pseudo}
  * Example:
  * s = ["a1 b4", "b2", "c3 a5", "b", null]
- * r = extract_all_record(s,"([ab])(\\d)")
+ * p = regex_program::create("([ab])(\\d)")
+ * r = extract_all_record(s, p)
  * r is now [ ["a", "1", "b", "4"],
  *            ["b", "2"],
  *            ["a", "5"],
@@ -118,21 +122,19 @@ std::unique_ptr<table> extract(
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param strings Strings instance for this operation.
- * @param pattern The regular expression pattern with group indicators.
- * @param flags Regex flags for interpreting special characters in the pattern.
- * @param mr Device memory resource used to allocate any returned device memory.
- * @return Lists column containing strings extracted from the input column.
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate any returned device memory
+ * @return Lists column containing strings extracted from the input column
  */
 std::unique_ptr<column> extract_all_record(
   strings_column_view const& strings,
-  std::string_view pattern,
-  regex_flags const flags             = regex_flags::DEFAULT,
+  regex_program const& prog,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Returns a lists column of strings where each string column row corresponds to the
- * matching group specified in the given regex_program object
+ * matching group specified in the given regular expression pattern.
  *
  * All the matching groups for the first row will go in the first row output column; the second
  * row results will go into the second row output column and so on.
@@ -143,8 +145,7 @@ std::unique_ptr<column> extract_all_record(
  * @code{.pseudo}
  * Example:
  * s = ["a1 b4", "b2", "c3 a5", "b", null]
- * p = regex_program::create("([ab])(\\d)")
- * r = extract_all_record(s, p)
+ * r = extract_all_record(s,"([ab])(\\d)")
  * r is now [ ["a", "1", "b", "4"],
  *            ["b", "2"],
  *            ["a", "5"],
@@ -154,14 +155,19 @@ std::unique_ptr<column> extract_all_record(
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param strings Strings instance for this operation
- * @param prog Regex program instance
- * @param mr Device memory resource used to allocate any returned device memory
- * @return Lists column containing strings extracted from the input column
+ * @deprecated Use @link extract_all_record extract_all_record(strings_column_view const&,
+ * regex_program const&, rmm::mr::device_memory_resource*) @endlink
+ *
+ * @param strings Strings instance for this operation.
+ * @param pattern The regular expression pattern with group indicators.
+ * @param flags Regex flags for interpreting special characters in the pattern.
+ * @param mr Device memory resource used to allocate any returned device memory.
+ * @return Lists column containing strings extracted from the input column.
  */
-std::unique_ptr<column> extract_all_record(
+[[deprecated]] std::unique_ptr<column> extract_all_record(
   strings_column_view const& strings,
-  regex_program const& prog,
+  std::string_view pattern,
+  regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of doxygen group
diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp
index 366e1eb0482..3ac881777e4 100644
--- a/cpp/include/cudf/strings/findall.hpp
+++ b/cpp/include/cudf/strings/findall.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,8 +33,8 @@ struct regex_program;
  */
 
 /**
- * @brief Returns a lists column of strings for each matching occurrence of the
- * regex pattern within each string.
+ * @brief Returns a lists column of strings for each matching occurrence using
+ * the regex_program pattern within each string
  *
  * Each output row includes all the substrings within the corresponding input row
  * that match the given pattern. If no matches are found, the output row is empty.
@@ -42,7 +42,8 @@ struct regex_program;
  * @code{.pseudo}
  * Example:
  * s = ["bunny", "rabbit", "hare", "dog"]
- * r = findall(s, "[ab]")
+ * p = regex_program::create("[ab]")
+ * r = findall(s, p)
  * r is now a lists column like:
  *  [ ["b"]
  *    ["a","b","b"]
@@ -54,21 +55,19 @@ struct regex_program;
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param input Strings instance for this operation.
- * @param pattern Regex pattern to match within each string.
- * @param flags Regex flags for interpreting special characters in the pattern.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New lists column of strings.
+ * @param input Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New lists column of strings
  */
 std::unique_ptr<column> findall(
   strings_column_view const& input,
-  std::string_view pattern,
-  regex_flags const flags             = regex_flags::DEFAULT,
+  regex_program const& prog,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Returns a lists column of strings for each matching occurrence using
- * the regex_program pattern within each string
+ * @brief Returns a lists column of strings for each matching occurrence of the
+ * regex pattern within each string.
  *
  * Each output row includes all the substrings within the corresponding input row
  * that match the given pattern. If no matches are found, the output row is empty.
@@ -76,8 +75,7 @@ std::unique_ptr<column> findall(
  * @code{.pseudo}
  * Example:
  * s = ["bunny", "rabbit", "hare", "dog"]
- * p = regex_program::create("[ab]")
- * r = findall(s, p)
+ * r = findall(s, "[ab]")
  * r is now a lists column like:
  *  [ ["b"]
  *    ["a","b","b"]
@@ -89,14 +87,19 @@ std::unique_ptr<column> findall(
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param input Strings instance for this operation
- * @param prog Regex program instance
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return New lists column of strings
+ * @deprecated Use @link findall findall(strings_column_view const&,
+ * regex_program const&, rmm::mr::device_memory_resource*) @endlink
+ *
+ * @param input Strings instance for this operation.
+ * @param pattern Regex pattern to match within each string.
+ * @param flags Regex flags for interpreting special characters in the pattern.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New lists column of strings.
  */
-std::unique_ptr<column> findall(
+[[deprecated]] std::unique_ptr<column> findall(
   strings_column_view const& input,
-  regex_program const& prog,
+  std::string_view pattern,
+  regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of doxygen group
diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp
index 60c66956fb8..70e44a68c9a 100644
--- a/cpp/include/cudf/strings/replace_re.hpp
+++ b/cpp/include/cudf/strings/replace_re.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,53 +36,56 @@ struct regex_program;
  */
 
 /**
- * @brief For each string, replaces any character sequence matching the given pattern
+ * @brief For each string, replaces any character sequence matching the given regex
  * with the provided replacement string.
  *
  * Any null string entries return corresponding null output column entries.
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param strings Strings instance for this operation.
- * @param pattern The regular expression pattern to search within each string.
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
  * @param replacement The string used to replace the matched sequence in each string.
  *        Default is an empty string.
  * @param max_replace_count The maximum number of times to replace the matched pattern
  *        within each string. Default replaces every substring that is matched.
- * @param flags Regex flags for interpreting special characters in the pattern.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New strings column.
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New strings column
  */
 std::unique_ptr<column> replace_re(
   strings_column_view const& strings,
-  std::string_view pattern,
+  regex_program const& prog,
   string_scalar const& replacement           = string_scalar(""),
   std::optional<size_type> max_replace_count = std::nullopt,
-  regex_flags const flags                    = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr        = rmm::mr::get_current_device_resource());
 
 /**
- * @brief For each string, replaces any character sequence matching the given regex
+ * @brief For each string, replaces any character sequence matching the given pattern
  * with the provided replacement string.
  *
  * Any null string entries return corresponding null output column entries.
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param strings Strings instance for this operation
- * @param prog Regex program instance
+ * @deprecated Use @link replace_re replace_re(strings_column_view const&, regex_program const&,
+ * string_scalar const&, std::optional<size_type>, rmm::mr::device_memory_resource*) @endlink
+ *
+ * @param strings Strings instance for this operation.
+ * @param pattern The regular expression pattern to search within each string.
  * @param replacement The string used to replace the matched sequence in each string.
  *        Default is an empty string.
  * @param max_replace_count The maximum number of times to replace the matched pattern
  *        within each string. Default replaces every substring that is matched.
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return New strings column
+ * @param flags Regex flags for interpreting special characters in the pattern.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New strings column.
  */
-std::unique_ptr<column> replace_re(
+[[deprecated]] std::unique_ptr<column> replace_re(
   strings_column_view const& strings,
-  regex_program const& prog,
+  std::string_view pattern,
   string_scalar const& replacement           = string_scalar(""),
   std::optional<size_type> max_replace_count = std::nullopt,
+  regex_flags const flags                    = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr        = rmm::mr::get_current_device_resource());
 
 /**
@@ -108,7 +111,7 @@ std::unique_ptr<column> replace_re(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief For each string, replaces any character sequence matching the given pattern
+ * @brief For each string, replaces any character sequence matching the given regex
  * using the replacement template for back-references.
  *
  * Any null string entries return corresponding null output column entries.
@@ -118,41 +121,44 @@ std::unique_ptr<column> replace_re(
  * @throw cudf::logic_error if capture index values in `replacement` are not in range 0-99, and also
  * if the index exceeds the group count specified in the pattern
  *
- * @param strings Strings instance for this operation.
- * @param pattern The regular expression patterns to search within each string.
- * @param replacement The replacement template for creating the output string.
- * @param flags Regex flags for interpreting special characters in the pattern.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New strings column.
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param replacement The replacement template for creating the output string
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New strings column
  */
 std::unique_ptr<column> replace_with_backrefs(
   strings_column_view const& strings,
-  std::string_view pattern,
+  regex_program const& prog,
   std::string_view replacement,
-  regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief For each string, replaces any character sequence matching the given regex
+ * @brief For each string, replaces any character sequence matching the given pattern
  * using the replacement template for back-references.
  *
  * Any null string entries return corresponding null output column entries.
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
+ * @deprecated Use @link replace_with_backrefs replace_with_backrefs(strings_column_view const&,
+ * regex_program const&, string_view, rmm::mr::device_memory_resource*) @endlink
+ *
  * @throw cudf::logic_error if capture index values in `replacement` are not in range 0-99, and also
  * if the index exceeds the group count specified in the pattern
  *
- * @param strings Strings instance for this operation
- * @param prog Regex program instance
- * @param replacement The replacement template for creating the output string
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return New strings column
+ * @param strings Strings instance for this operation.
+ * @param pattern The regular expression patterns to search within each string.
+ * @param replacement The replacement template for creating the output string.
+ * @param flags Regex flags for interpreting special characters in the pattern.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New strings column.
  */
-std::unique_ptr<column> replace_with_backrefs(
+[[deprecated]] std::unique_ptr<column> replace_with_backrefs(
   strings_column_view const& strings,
-  regex_program const& prog,
+  std::string_view pattern,
   std::string_view replacement,
+  regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace strings
diff --git a/cpp/include/cudf/strings/split/split_re.hpp b/cpp/include/cudf/strings/split/split_re.hpp
index c6bd1345ae6..fac5f130064 100644
--- a/cpp/include/cudf/strings/split/split_re.hpp
+++ b/cpp/include/cudf/strings/split/split_re.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ struct regex_program;
 
 /**
  * @brief Splits strings elements into a table of strings columns
- * using a regex pattern to delimit each string.
+ * using a regex_program's pattern to delimit each string
  *
  * Each element generates a vector of strings that are stored in corresponding
  * rows in the output table -- `table[col,row] = token[col] of strings[row]`
@@ -51,15 +51,19 @@ struct regex_program;
  * corresponding row of the first column.
  * A null row will produce corresponding null rows in the output table.
  *
+ * The regex_program's regex_flags are ignored.
+ *
  * @code{.pseudo}
  * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
- * s1 = split_re(s, "[_ ]")
+ * p1 = regex_program::create("[_ ]")
+ * s1 = split_re(s, p1)
  * s1 is a table of strings columns:
  *     [ ["a", "a", "", "ab"],
  *       ["bc", "", "ab", "cd"],
  *       ["def", "bc", "cd", ""],
  *       ["g", null, null, null] ]
- * s2 = split_re(s, "[ _]", 1)
+ * p2 = regex_program::create("[ _]")
+ * s2 = split_re(s, p2, 1)
  * s2 is a table of strings columns:
  *     [ ["a", "a", "", "ab"],
  *       ["bc def_g", "_bc", "ab cd", "cd "] ]
@@ -67,22 +71,22 @@ struct regex_program;
  *
  * @throw cudf::logic_error if `pattern` is empty.
  *
- * @param input A column of string elements to be split.
- * @param pattern The regex pattern for delimiting characters within each string.
+ * @param input A column of string elements to be split
+ * @param prog Regex program instance
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned result's device memory.
- * @return A table of columns of strings.
+ * @param mr Device memory resource used to allocate the returned result's device memory
+ * @return A table of columns of strings
  */
 std::unique_ptr<table> split_re(
   strings_column_view const& input,
-  std::string_view pattern,
+  regex_program const& prog,
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Splits strings elements into a table of strings columns
- * using a regex_program's pattern to delimit each string
+ * using a regex pattern to delimit each string.
  *
  * Each element generates a vector of strings that are stored in corresponding
  * rows in the output table -- `table[col,row] = token[col] of strings[row]`
@@ -99,42 +103,41 @@ std::unique_ptr<table> split_re(
  * corresponding row of the first column.
  * A null row will produce corresponding null rows in the output table.
  *
- * The regex_program's regex_flags are ignored.
- *
  * @code{.pseudo}
  * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
- * p1 = regex_program::create("[_ ]")
- * s1 = split_re(s, p1)
+ * s1 = split_re(s, "[_ ]")
  * s1 is a table of strings columns:
  *     [ ["a", "a", "", "ab"],
  *       ["bc", "", "ab", "cd"],
  *       ["def", "bc", "cd", ""],
  *       ["g", null, null, null] ]
- * p2 = regex_program::create("[ _]")
- * s2 = split_re(s, p2, 1)
+ * s2 = split_re(s, "[ _]", 1)
  * s2 is a table of strings columns:
  *     [ ["a", "a", "", "ab"],
  *       ["bc def_g", "_bc", "ab cd", "cd "] ]
  * @endcode
  *
+ * @deprecated Use @link split_re split_re(strings_column_view const&,
+ * regex_program const&, size_type, rmm::mr::device_memory_resource*) @endlink
+ *
  * @throw cudf::logic_error if `pattern` is empty.
  *
- * @param input A column of string elements to be split
- * @param prog Regex program instance
+ * @param input A column of string elements to be split.
+ * @param pattern The regex pattern for delimiting characters within each string.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned result's device memory
- * @return A table of columns of strings
+ * @param mr Device memory resource used to allocate the returned result's device memory.
+ * @return A table of columns of strings.
  */
-std::unique_ptr<table> split_re(
+[[deprecated]] std::unique_ptr<table> split_re(
   strings_column_view const& input,
-  regex_program const& prog,
+  std::string_view pattern,
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Splits strings elements into a table of strings columns
- * using a regex pattern to delimit each string starting from the end of the string.
+ * @brief Splits strings elements into a table of strings columns using a
+ * regex_program's pattern to delimit each string starting from the end of the string
  *
  * Each element generates a vector of strings that are stored in corresponding
  * rows in the output table -- `table[col,row] = token[col] of string[row]`
@@ -153,15 +156,19 @@ std::unique_ptr<table> split_re(
  * corresponding row of the first column.
  * A null row will produce corresponding null rows in the output table.
  *
+ * The regex_program's regex_flags are ignored.
+ *
  * @code{.pseudo}
  * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
- * s1 = rsplit_re(s, "[_ ]")
+ * p1 = regex_program::create("[_ ]")
+ * s1 = rsplit_re(s, p1)
  * s1 is a table of strings columns:
  *     [ ["a", "a", "", "ab"],
  *       ["bc", "", "ab", "cd"],
  *       ["def", "bc", "cd", ""],
  *       ["g", null, null, null] ]
- * s2 = rsplit_re(s, "[ _]", 1)
+ * p2 = regex_program::create("[ _]")
+ * s2 = rsplit_re(s, p2, 1)
  * s2 is a table of strings columns:
  *     [ ["a_bc def", "a_", "_ab", "ab"],
  *       ["g", "bc", "cd", "cd "] ]
@@ -170,7 +177,7 @@ std::unique_ptr<table> split_re(
  * @throw cudf::logic_error if `pattern` is empty.
  *
  * @param input A column of string elements to be split.
- * @param pattern The regex pattern for delimiting characters within each string.
+ * @param prog Regex program instance
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
  * @param mr Device memory resource used to allocate the returned result's device memory.
@@ -178,13 +185,13 @@ std::unique_ptr<table> split_re(
  */
 std::unique_ptr<table> rsplit_re(
   strings_column_view const& input,
-  std::string_view pattern,
+  regex_program const& prog,
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Splits strings elements into a table of strings columns using a
- * regex_program's pattern to delimit each string starting from the end of the string
+ * @brief Splits strings elements into a table of strings columns
+ * using a regex pattern to delimit each string starting from the end of the string.
  *
  * Each element generates a vector of strings that are stored in corresponding
  * rows in the output table -- `table[col,row] = token[col] of string[row]`
@@ -203,42 +210,41 @@ std::unique_ptr<table> rsplit_re(
  * corresponding row of the first column.
  * A null row will produce corresponding null rows in the output table.
  *
- * The regex_program's regex_flags are ignored.
- *
  * @code{.pseudo}
  * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
- * p1 = regex_program::create("[_ ]")
- * s1 = rsplit_re(s, p1)
+ * s1 = rsplit_re(s, "[_ ]")
  * s1 is a table of strings columns:
  *     [ ["a", "a", "", "ab"],
  *       ["bc", "", "ab", "cd"],
  *       ["def", "bc", "cd", ""],
  *       ["g", null, null, null] ]
- * p2 = regex_program::create("[ _]")
- * s2 = rsplit_re(s, p2, 1)
+ * s2 = rsplit_re(s, "[ _]", 1)
  * s2 is a table of strings columns:
  *     [ ["a_bc def", "a_", "_ab", "ab"],
  *       ["g", "bc", "cd", "cd "] ]
  * @endcode
  *
+ * @deprecated Use @link rsplit_re rsplit_re(strings_column_view const&,
+ * regex_program const&, size_type, rmm::mr::device_memory_resource*) @endlink
+ *
  * @throw cudf::logic_error if `pattern` is empty.
  *
  * @param input A column of string elements to be split.
- * @param prog Regex program instance
+ * @param pattern The regex pattern for delimiting characters within each string.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
  * @param mr Device memory resource used to allocate the returned result's device memory.
  * @return A table of columns of strings.
  */
-std::unique_ptr<table> rsplit_re(
+[[deprecated]] std::unique_ptr<table> rsplit_re(
   strings_column_view const& input,
-  regex_program const& prog,
+  std::string_view pattern,
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Splits strings elements into a list column of strings
- * using the given regex pattern to delimit each string.
+ * using the given regex_program to delimit each string
  *
  * Each element generates an array of strings that are stored in an output
  * lists column -- `list[row] = [token1, token2, ...] found in input[row]`
@@ -255,15 +261,19 @@ std::unique_ptr<table> rsplit_re(
  * An empty input string will produce a corresponding empty list item output row.
  * A null row will produce a corresponding null output row.
  *
+ * The regex_program's regex_flags are ignored.
+ *
  * @code{.pseudo}
  * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
- * s1 = split_record_re(s, "[_ ]")
+ * p1 = regex_program::create("[_ ]")
+ * s1 = split_record_re(s, p1)
  * s1 is a lists column of strings:
  *     [ ["a", "bc", "def", "g"],
  *       ["a", "", "bc"],
  *       ["", "ab", "cd"],
  *       ["ab", "cd", ""] ]
- * s2 = split_record_re(s, "[ _]", 1)
+ * p2 = regex_program::create("[ _]")
+ * s2 = split_record_re(s, p2, 1)
  * s2 is a lists column of strings:
  *     [ ["a", "bc def_g"],
  *       ["a", "_bc"],
@@ -275,22 +285,22 @@ std::unique_ptr<table> rsplit_re(
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param input A column of string elements to be split.
- * @param pattern The regex pattern for delimiting characters within each string.
+ * @param input A column of string elements to be split
+ * @param prog Regex program instance
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned result's device memory.
+ * @param mr Device memory resource used to allocate the returned result's device memory
  * @return Lists column of strings.
  */
 std::unique_ptr<column> split_record_re(
   strings_column_view const& input,
-  std::string_view pattern,
+  regex_program const& prog,
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Splits strings elements into a list column of strings
- * using the given regex_program to delimit each string
+ * using the given regex pattern to delimit each string.
  *
  * Each element generates an array of strings that are stored in an output
  * lists column -- `list[row] = [token1, token2, ...] found in input[row]`
@@ -307,19 +317,15 @@ std::unique_ptr<column> split_record_re(
  * An empty input string will produce a corresponding empty list item output row.
  * A null row will produce a corresponding null output row.
  *
- * The regex_program's regex_flags are ignored.
- *
  * @code{.pseudo}
  * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
- * p1 = regex_program::create("[_ ]")
- * s1 = split_record_re(s, p1)
+ * s1 = split_record_re(s, "[_ ]")
  * s1 is a lists column of strings:
  *     [ ["a", "bc", "def", "g"],
  *       ["a", "", "bc"],
  *       ["", "ab", "cd"],
  *       ["ab", "cd", ""] ]
- * p2 = regex_program::create("[ _]")
- * s2 = split_record_re(s, p2, 1)
+ * s2 = split_record_re(s, "[ _]", 1)
  * s2 is a lists column of strings:
  *     [ ["a", "bc def_g"],
  *       ["a", "_bc"],
@@ -327,26 +333,29 @@ std::unique_ptr<column> split_record_re(
  *       ["ab", "cd "] ]
  * @endcode
  *
- * @throw cudf::logic_error if `pattern` is empty.
- *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
- * @param input A column of string elements to be split
- * @param prog Regex program instance
+ * @deprecated Use @link split_record_re split_record_re(strings_column_view const&,
+ * regex_program const&, size_type, rmm::mr::device_memory_resource*) @endlink
+ *
+ * @throw cudf::logic_error if `pattern` is empty.
+ *
+ * @param input A column of string elements to be split.
+ * @param pattern The regex pattern for delimiting characters within each string.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned result's device memory
+ * @param mr Device memory resource used to allocate the returned result's device memory.
  * @return Lists column of strings.
  */
-std::unique_ptr<column> split_record_re(
+[[deprecated]] std::unique_ptr<column> split_record_re(
   strings_column_view const& input,
-  regex_program const& prog,
+  std::string_view pattern,
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Splits strings elements into a list column of strings
- * using the given regex pattern to delimit each string starting from the end of the string.
+ * @brief Splits strings elements into a list column of strings using the given
+ * regex_program to delimit each string starting from the end of the string
  *
  * Each element generates a vector of strings that are stored in an output
  * lists column -- `list[row] = [token1, token2, ...] found in input[row]`
@@ -365,15 +374,19 @@ std::unique_ptr<column> split_record_re(
  * An empty input string will produce a corresponding empty list item output row.
  * A null row will produce a corresponding null output row.
  *
+ * The regex_program's regex_flags are ignored.
+ *
  * @code{.pseudo}
  * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
- * s1 = rsplit_record_re(s, "[_ ]")
+ * p1 = regex_program::create("[_ ]")
+ * s1 = rsplit_record_re(s, p1)
  * s1 is a lists column of strings:
  *     [ ["a", "bc", "def", "g"],
  *       ["a", "", "bc"],
  *       ["", "ab", "cd"],
  *       ["ab", "cd", ""] ]
- * s2 = rsplit_record_re(s, "[ _]", 1)
+ * p2 = regex_program::create("[ _]")
+ * s2 = rsplit_record_re(s, p2, 1)
  * s2 is a lists column of strings:
  *     [ ["a_bc def", "g"],
  *       ["a_", "bc"],
@@ -385,22 +398,22 @@ std::unique_ptr<column> split_record_re(
  *
  * @throw cudf::logic_error if `pattern` is empty.
  *
- * @param input A column of string elements to be split.
- * @param pattern The regex pattern for delimiting characters within each string.
+ * @param input A column of string elements to be split
+ * @param prog Regex program instance
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned result's device memory.
- * @return Lists column of strings.
+ * @param mr Device memory resource used to allocate the returned result's device memory
+ * @return Lists column of strings
  */
 std::unique_ptr<column> rsplit_record_re(
   strings_column_view const& input,
-  std::string_view pattern,
+  regex_program const& prog,
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Splits strings elements into a list column of strings using the given
- * regex_program to delimit each string starting from the end of the string
+ * @brief Splits strings elements into a list column of strings
+ * using the given regex pattern to delimit each string starting from the end of the string.
  *
  * Each element generates a vector of strings that are stored in an output
  * lists column -- `list[row] = [token1, token2, ...] found in input[row]`
@@ -419,19 +432,15 @@ std::unique_ptr<column> rsplit_record_re(
  * An empty input string will produce a corresponding empty list item output row.
  * A null row will produce a corresponding null output row.
  *
- * The regex_program's regex_flags are ignored.
- *
  * @code{.pseudo}
  * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
- * p1 = regex_program::create("[_ ]")
- * s1 = rsplit_record_re(s, p1)
+ * s1 = rsplit_record_re(s, "[_ ]")
  * s1 is a lists column of strings:
  *     [ ["a", "bc", "def", "g"],
  *       ["a", "", "bc"],
  *       ["", "ab", "cd"],
  *       ["ab", "cd", ""] ]
- * p2 = regex_program::create("[ _]")
- * s2 = rsplit_record_re(s, p2, 1)
+ * s2 = rsplit_record_re(s, "[ _]", 1)
  * s2 is a lists column of strings:
  *     [ ["a_bc def", "g"],
  *       ["a_", "bc"],
@@ -441,18 +450,21 @@ std::unique_ptr<column> rsplit_record_re(
  *
  * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
  *
+ * @deprecated Use @link rsplit_record_re rsplit_record_re(strings_column_view const&,
+ * regex_program const&, size_type, rmm::mr::device_memory_resource*) @endlink
+ *
  * @throw cudf::logic_error if `pattern` is empty.
  *
- * @param input A column of string elements to be split
- * @param prog Regex program instance
+ * @param input A column of string elements to be split.
+ * @param pattern The regex pattern for delimiting characters within each string.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned result's device memory
- * @return Lists column of strings
+ * @param mr Device memory resource used to allocate the returned result's device memory.
+ * @return Lists column of strings.
  */
-std::unique_ptr<column> rsplit_record_re(
+[[deprecated]] std::unique_ptr<column> rsplit_record_re(
   strings_column_view const& input,
-  regex_program const& prog,
+  std::string_view pattern,
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/doxygen_groups.h b/cpp/include/doxygen_groups.h
index b1d56f43057..628d48f64cd 100644
--- a/cpp/include/doxygen_groups.h
+++ b/cpp/include/doxygen_groups.h
@@ -128,6 +128,7 @@
  *   @defgroup strings_modify Modifying
  *   @defgroup strings_replace Replacing
  *   @defgroup strings_split Splitting
+ *   @defgroup strings_extract Extracting
  *   @defgroup strings_json JSON
  *   @defgroup strings_regex Regex
  * @}
diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp
index 8ddd8eedb51..5331c4c34d8 100644
--- a/cpp/tests/strings/contains_tests.cpp
+++ b/cpp/tests/strings/contains_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -142,15 +142,13 @@ TEST_F(StringsContainsTests, ContainsTest)
 
   for (int idx = 0; idx < static_cast<int>(patterns.size()); ++idx) {
     std::string ptn  = patterns[idx];
-    auto results     = cudf::strings::contains_re(strings_view, ptn);
     bool* h_expected = h_expecteds.data() + (idx * h_strings.size());
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(ptn);
-    results   = cudf::strings::contains_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(ptn);
+    auto results = cudf::strings::contains_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
@@ -167,54 +165,46 @@ TEST_F(StringsContainsTests, MatchesTest)
   auto strings_view = cudf::strings_column_view(strings);
   {
     auto const pattern = std::string("lazy");
-    auto results       = cudf::strings::matches_re(strings_view, pattern);
     bool h_expected[]  = {false, false, true, false, false, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::matches_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::matches_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     auto const pattern = std::string("\\d+");
-    auto results       = cudf::strings::matches_re(strings_view, pattern);
     bool h_expected[]  = {false, false, false, true, true, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::matches_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::matches_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     auto const pattern = std::string("@\\w+");
-    auto results       = cudf::strings::matches_re(strings_view, pattern);
     bool h_expected[]  = {false, false, false, false, false, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::matches_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::matches_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     auto const pattern = std::string(".*");
-    auto results       = cudf::strings::matches_re(strings_view, pattern);
     bool h_expected[]  = {true, true, true, true, true, false, true};
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::matches_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::matches_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
@@ -236,12 +226,10 @@ TEST_F(StringsContainsTests, MatchesIPV4Test)
     std::string pattern =
       "^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"
       "$";
-    auto results = cudf::strings::matches_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<bool> expected(
       {true, true, false, false, false, false, true, true, true, true});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::matches_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::matches_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
   }
   {  // is_loopback: 72 instructions
@@ -249,12 +237,10 @@ TEST_F(StringsContainsTests, MatchesIPV4Test)
       "^127\\.([0-9]|[1-9][0-9]|1([0-9][0-9])|2([0-4][0-9]|5[0-5]))"
       "\\.([0-9]|[1-9][0-9]|1([0-9][0-9])|2([0-4][0-9]|5[0-5]))"
       "\\.([0-9]|[1-9][0-9]|1([0-9][0-9])|2([0-4][0-9]|5[0-5]))$";
-    auto results = cudf::strings::matches_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<bool> expected(
       {false, false, false, false, false, false, false, false, false, true});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::matches_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::matches_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
   }
   {  // is_multicast: 79 instructions
@@ -262,12 +248,10 @@ TEST_F(StringsContainsTests, MatchesIPV4Test)
       "^(2(2[4-9]|3[0-9]))\\.([0-9]|[1-9][0-9]|1([0-9][0-9])|2([0-4][0-9]|5[0-5]))"
       "\\.([0-9]|[1-9][0-9]|1([0-9][0-9])|2([0-4][0-9]|5[0-5]))"
       "\\.([0-9]|[1-9][0-9]|1([0-9][0-9])|2([0-4][0-9]|5[0-5]))$";
-    auto results = cudf::strings::matches_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<bool> expected(
       {false, false, false, false, false, false, true, true, false, false});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::matches_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::matches_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
   }
 }
@@ -279,40 +263,30 @@ TEST_F(StringsContainsTests, OctalTest)
   auto expected     = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 1, 0, 0, 0});
 
   auto pattern = std::string("\\101");
-  auto results = cudf::strings::contains_re(strings_view, pattern);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::contains_re(strings_view, *prog);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
   pattern = std::string("\\1013");
-  results = cudf::strings::contains_re(strings_view, pattern);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   prog    = cudf::strings::regex_program::create(pattern);
   results = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
   pattern = std::string("D*\\101\\063");
-  results = cudf::strings::contains_re(strings_view, pattern);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   prog    = cudf::strings::regex_program::create(pattern);
   results = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
   pattern  = std::string("\\719");
-  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 0, 0, 1, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::contains_re(strings_view, *prog);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
   pattern  = std::string("[\\7][\\11][\\15]");
-  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 0, 0, 0, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::contains_re(strings_view, *prog);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
@@ -334,20 +308,16 @@ TEST_F(StringsContainsTests, HexTest)
     str << "\\x" << std::setfill('0') << std::setw(2) << std::hex << static_cast<int32_t>(ch);
     std::string pattern = str.str();
 
-    auto results = cudf::strings::contains_re(strings_view, pattern);
     // only one element in the input should match ch
     auto true_dat = cudf::detail::make_counting_transform_iterator(
       0, [ch](auto idx) { return ch == static_cast<char>(idx); });
     cudf::test::fixed_width_column_wrapper<bool> expected(true_dat, true_dat + count);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::contains_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::contains_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
     // also test hex character appearing in character class brackets
     pattern = "[" + pattern + "]";
-    results = cudf::strings::contains_re(strings_view, pattern);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
     prog    = cudf::strings::regex_program::create(pattern);
     results = cudf::strings::contains_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
@@ -366,43 +336,33 @@ TEST_F(StringsContainsTests, EmbeddedNullCharacter)
   auto strings_view = cudf::strings_column_view(input);
 
   auto pattern  = std::string("A");
-  auto results  = cudf::strings::contains_re(strings_view, pattern);
   auto expected = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 0, 0, 0, 0, 0, 0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::contains_re(strings_view, *prog);
+  auto prog     = cudf::strings::regex_program::create(pattern);
+  auto results  = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
   pattern  = std::string("B");
-  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::contains_re(strings_view, *prog);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
   pattern  = std::string("J\\0B");
-  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 0, 0, 0, 0, 0, 0, 0, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::contains_re(strings_view, *prog);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
   pattern  = std::string("[G-J][\\0]B");
-  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 0, 0, 0, 0, 1, 1, 1, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::contains_re(strings_view, *prog);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
   pattern  = std::string("[A-D][\\x00]B");
-  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 1, 0, 0, 0, 0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::contains_re(strings_view, *prog);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
@@ -427,32 +387,26 @@ TEST_F(StringsContainsTests, CountTest)
   auto strings_view = cudf::strings_column_view(strings);
   {
     auto pattern = std::string("[tT]he");
-    auto results = cudf::strings::count_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected(
       {2, 0, 0, 0, 0, 0}, cudf::test::iterators::nulls_from_nullptrs(h_strings));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     auto pattern = std::string("@\\w+");
-    auto results = cudf::strings::count_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected(
       {1, 1, 0, 0, 0, 0}, cudf::test::iterators::nulls_from_nullptrs(h_strings));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     auto pattern = std::string("\\d+:\\d+");
-    auto results = cudf::strings::count_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected(
       {0, 0, 2, 1, 0, 0}, cudf::test::iterators::nulls_from_nullptrs(h_strings));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
@@ -465,71 +419,57 @@ TEST_F(StringsContainsTests, FixedQuantifier)
   {
     // exact match
     auto pattern = std::string("a{3}");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 0, 1, 1, 1, 2});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // range match (greedy quantifier)
     auto pattern = std::string("a{3,5}");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 0, 1, 1, 1, 1});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // minimum match (greedy quantifier)
     auto pattern = std::string("a{2,}");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 1, 1, 1, 1, 1});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // range match (lazy quantifier)
     auto pattern = std::string("a{2,4}?");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 1, 1, 2, 2, 3});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // minimum match (lazy quantifier)
     auto pattern = std::string("a{1,}?");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({1, 2, 3, 4, 5, 6});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // zero match
     auto pattern = std::string("aaaa{0}");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 0, 1, 1, 1, 2});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // poorly formed
     auto pattern = std::string("aaaa{n,m}");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 0, 0, 0, 0, 0});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
@@ -557,20 +497,16 @@ TEST_F(StringsContainsTests, OverlappedClasses)
 
   {
     auto pattern = std::string("[e-gb-da-c]");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({7, 4, 0, 0, 1});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     auto pattern = std::string("[á-éê-ú]");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 1, 0, 6, 0});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
@@ -582,20 +518,16 @@ TEST_F(StringsContainsTests, NegatedClasses)
 
   {
     auto pattern = std::string("[^a-f]");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({1, 4, 0, 5, 3});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     auto pattern = std::string("[^a-eá-é]");
-    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({2, 5, 0, 1, 3});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::count_re(sv, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::count_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
@@ -607,31 +539,35 @@ TEST_F(StringsContainsTests, IncompleteClassesRange)
 
   {
     cudf::test::fixed_width_column_wrapper<bool> expected({1, 0, 0, 1, 1});
-    auto results = cudf::strings::contains_re(sv, "[a-z]");
+    auto prog    = cudf::strings::regex_program::create("[a-z]");
+    auto results = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
-    auto prog = cudf::strings::regex_program::create("[a-m-z]");  // same as [a-z]
-    results   = cudf::strings::contains_re(sv, *prog);
+    prog    = cudf::strings::regex_program::create("[a-m-z]");  // same as [a-z]
+    results = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     cudf::test::fixed_width_column_wrapper<bool> expected({1, 1, 0, 1, 1});
-    auto results = cudf::strings::contains_re(sv, "[g-]");
+    auto prog    = cudf::strings::regex_program::create("[g-]");
+    auto results = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
-    auto prog = cudf::strings::regex_program::create("[-k]");
-    results   = cudf::strings::contains_re(sv, *prog);
+    prog    = cudf::strings::regex_program::create("[-k]");
+    results = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     cudf::test::fixed_width_column_wrapper<bool> expected({1, 1, 0, 0, 1});
-    auto results = cudf::strings::contains_re(sv, "[-]");
+    auto prog    = cudf::strings::regex_program::create("[-]");
+    auto results = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    results = cudf::strings::contains_re(sv, "[+--]");
+    prog    = cudf::strings::regex_program::create("[+--]");
+    results = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
-    auto prog = cudf::strings::regex_program::create("[a-c-]");
-    results   = cudf::strings::contains_re(sv, *prog);
+    prog    = cudf::strings::regex_program::create("[a-c-]");
+    results = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
     prog    = cudf::strings::regex_program::create("[-d-f]");
     results = cudf::strings::contains_re(sv, *prog);
@@ -650,37 +586,25 @@ TEST_F(StringsContainsTests, MultiLine)
   auto prog_ml =
     cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::MULTILINE);
 
-  auto results = cudf::strings::contains_re(view, pattern, cudf::strings::regex_flags::MULTILINE);
   auto expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 0, 1, 1});
+  auto results           = cudf::strings::contains_re(view, *prog_ml);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-  results = cudf::strings::contains_re(view, *prog_ml);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-  results           = cudf::strings::contains_re(view, pattern);
   expected_contains = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 1, 0, 1, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-  results = cudf::strings::contains_re(view, *prog);
+  results           = cudf::strings::contains_re(view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
 
-  results = cudf::strings::matches_re(view, pattern, cudf::strings::regex_flags::MULTILINE);
   auto expected_matches = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 1, 0, 1, 0});
+  results               = cudf::strings::matches_re(view, *prog_ml);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
-  results = cudf::strings::matches_re(view, *prog_ml);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
-  results          = cudf::strings::matches_re(view, pattern);
   expected_matches = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 1, 0, 1, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
-  results = cudf::strings::matches_re(view, *prog);
+  results          = cudf::strings::matches_re(view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
 
-  results = cudf::strings::count_re(view, pattern, cudf::strings::regex_flags::MULTILINE);
   auto expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({2, 1, 1, 0, 1, 1});
+  results             = cudf::strings::count_re(view, *prog_ml);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  results = cudf::strings::count_re(view, *prog_ml);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  results        = cudf::strings::count_re(view, pattern);
   expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({0, 0, 1, 0, 1, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  results = cudf::strings::count_re(view, *prog);
+  results        = cudf::strings::count_re(view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
 }
 
@@ -723,50 +647,36 @@ TEST_F(StringsContainsTests, DotAll)
   auto prog_dotall =
     cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
 
-  auto results = cudf::strings::contains_re(view, pattern, cudf::strings::regex_flags::DOTALL);
   auto expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 0});
+  auto results           = cudf::strings::contains_re(view, *prog_dotall);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-  results = cudf::strings::contains_re(view, *prog_dotall);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-  results           = cudf::strings::contains_re(view, pattern);
   expected_contains = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 1, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-  results = cudf::strings::contains_re(view, *prog);
+  results           = cudf::strings::contains_re(view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
 
-  results = cudf::strings::matches_re(view, pattern, cudf::strings::regex_flags::DOTALL);
   auto expected_matches = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 1, 0});
+  results               = cudf::strings::matches_re(view, *prog_dotall);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
-  results = cudf::strings::matches_re(view, *prog_dotall);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
-  results          = cudf::strings::matches_re(view, pattern);
   expected_matches = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 1, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
-  results = cudf::strings::matches_re(view, *prog);
+  results          = cudf::strings::matches_re(view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
 
   pattern     = std::string("a.*?f");
   prog        = cudf::strings::regex_program::create(pattern);
   prog_dotall = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
 
-  results             = cudf::strings::count_re(view, pattern, cudf::strings::regex_flags::DOTALL);
   auto expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({2, 1, 1, 0});
+  results             = cudf::strings::count_re(view, *prog_dotall);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  results = cudf::strings::count_re(view, *prog_dotall);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  results        = cudf::strings::count_re(view, pattern);
   expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({0, 0, 1, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  results = cudf::strings::count_re(view, *prog);
+  results        = cudf::strings::count_re(view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
 
   auto both_flags = static_cast<cudf::strings::regex_flags>(cudf::strings::regex_flags::DOTALL |
                                                             cudf::strings::regex_flags::MULTILINE);
-  results         = cudf::strings::count_re(view, pattern, both_flags);
   expected_count  = cudf::test::fixed_width_column_wrapper<int32_t>({2, 1, 1, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  auto prog_both = cudf::strings::regex_program::create(pattern, both_flags);
-  results        = cudf::strings::count_re(view, *prog_both);
+  auto prog_both  = cudf::strings::regex_program::create(pattern, both_flags);
+  results         = cudf::strings::count_re(view, *prog_both);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
 }
 
@@ -782,18 +692,14 @@ TEST_F(StringsContainsTests, ASCII)
                             "\\w+\\s+\\d+"};
 
   for (auto ptn : patterns) {
-    auto results = cudf::strings::contains_re(view, ptn, cudf::strings::regex_flags::ASCII);
     auto expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 0, 0});
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-    auto prog = cudf::strings::regex_program::create(ptn, cudf::strings::regex_flags::ASCII);
-    results   = cudf::strings::contains_re(view, *prog);
+    auto prog    = cudf::strings::regex_program::create(ptn, cudf::strings::regex_flags::ASCII);
+    auto results = cudf::strings::contains_re(view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
 
-    results           = cudf::strings::contains_re(view, ptn);
     expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 1});
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-    prog    = cudf::strings::regex_program::create(ptn);
-    results = cudf::strings::contains_re(view, *prog);
+    prog              = cudf::strings::regex_program::create(ptn);
+    results           = cudf::strings::contains_re(view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
   }
 }
@@ -804,6 +710,7 @@ TEST_F(StringsContainsTests, MediumRegex)
   std::string medium_regex =
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
     "http://www.world.com";
+  auto prog = cudf::strings::regex_program::create(medium_regex);
 
   std::vector<const char*> h_strings{
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
@@ -816,21 +723,21 @@ TEST_F(StringsContainsTests, MediumRegex)
 
   auto strings_view = cudf::strings_column_view(strings);
   {
-    auto results      = cudf::strings::contains_re(strings_view, medium_regex);
+    auto results      = cudf::strings::contains_re(strings_view, *prog);
     bool h_expected[] = {true, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(h_expected,
                                                           h_expected + h_strings.size());
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results      = cudf::strings::matches_re(strings_view, medium_regex);
+    auto results      = cudf::strings::matches_re(strings_view, *prog);
     bool h_expected[] = {true, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(h_expected,
                                                           h_expected + h_strings.size());
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results         = cudf::strings::count_re(strings_view, medium_regex);
+    auto results         = cudf::strings::count_re(strings_view, *prog);
     int32_t h_expected[] = {1, 0, 0};
     cudf::test::fixed_width_column_wrapper<int32_t> expected(h_expected,
                                                              h_expected + h_strings.size());
@@ -844,6 +751,7 @@ TEST_F(StringsContainsTests, LargeRegex)
   std::string large_regex =
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
     "http://www.world.com I'm here @home zzzz";
+  auto prog = cudf::strings::regex_program::create(large_regex);
 
   std::vector<const char*> h_strings{
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
@@ -856,21 +764,21 @@ TEST_F(StringsContainsTests, LargeRegex)
 
   auto strings_view = cudf::strings_column_view(strings);
   {
-    auto results      = cudf::strings::contains_re(strings_view, large_regex);
+    auto results      = cudf::strings::contains_re(strings_view, *prog);
     bool h_expected[] = {true, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(h_expected,
                                                           h_expected + h_strings.size());
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results      = cudf::strings::matches_re(strings_view, large_regex);
+    auto results      = cudf::strings::matches_re(strings_view, *prog);
     bool h_expected[] = {true, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(h_expected,
                                                           h_expected + h_strings.size());
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results         = cudf::strings::count_re(strings_view, large_regex);
+    auto results         = cudf::strings::count_re(strings_view, *prog);
     int32_t h_expected[] = {1, 0, 0};
     cudf::test::fixed_width_column_wrapper<int32_t> expected(h_expected,
                                                              h_expected + h_strings.size());
@@ -883,21 +791,21 @@ TEST_F(StringsContainsTests, ExtraLargeRegex)
   // This results in 321 regex instructions which is above the 'large' range.
   std::string data(320, '0');
   cudf::test::strings_column_wrapper strings({data, data, data, data, data, "00"});
-  std::string pattern = data;
+  auto prog = cudf::strings::regex_program::create(data);
 
   auto strings_view = cudf::strings_column_view(strings);
   {
-    auto results = cudf::strings::contains_re(strings_view, pattern);
+    auto results = cudf::strings::contains_re(strings_view, *prog);
     cudf::test::fixed_width_column_wrapper<bool> expected({true, true, true, true, true, false});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results = cudf::strings::matches_re(strings_view, pattern);
+    auto results = cudf::strings::matches_re(strings_view, *prog);
     cudf::test::fixed_width_column_wrapper<bool> expected({true, true, true, true, true, false});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results = cudf::strings::count_re(strings_view, pattern);
+    auto results = cudf::strings::count_re(strings_view, *prog);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({1, 1, 1, 1, 1, 0});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp
index 07d1b99da5a..1ca218a5522 100644
--- a/cpp/tests/strings/extract_tests.cpp
+++ b/cpp/tests/strings/extract_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,7 +61,6 @@ TEST_F(StringsExtractTests, ExtractTest)
                                        nullptr};
 
   std::string pattern = "(\\w+) (\\w+)";
-  auto results        = cudf::strings::extract(strings_view, pattern);
 
   cudf::test::strings_column_wrapper expected1(
     h_expecteds.data(),
@@ -76,10 +75,9 @@ TEST_F(StringsExtractTests, ExtractTest)
   columns.push_back(expected1.release());
   columns.push_back(expected2.release());
   cudf::table expected(std::move(columns));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::extract(strings_view, pattern);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::extract(strings_view, *prog);
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 }
 
@@ -102,7 +100,6 @@ TEST_F(StringsExtractTests, ExtractDomainTest)
   auto strings_view = cudf::strings_column_view(strings);
 
   std::string pattern = "([\\w]+[\\.].*[^/]|[\\-\\w]+[\\.].*[^/])";
-  auto results        = cudf::strings::extract(strings_view, pattern);
 
   cudf::test::strings_column_wrapper expected1({
     "www.google.com",
@@ -121,10 +118,9 @@ TEST_F(StringsExtractTests, ExtractDomainTest)
     "a23-44-13-2.deploy.static.akamaitechnologies.com",
   });
   cudf::table_view expected{{expected1}};
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::extract(strings_view, *prog);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::extract(strings_view, *prog);
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 }
 
@@ -154,11 +150,9 @@ TEST_F(StringsExtractTests, ExtractEventTest)
 
   for (std::size_t idx = 0; idx < patterns.size(); ++idx) {
     auto pattern = patterns[idx];
-    auto results = cudf::strings::extract(strings_view, pattern);
     cudf::test::strings_column_wrapper expected({expecteds[idx]});
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view().column(0), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    results   = cudf::strings::extract(strings_view, *prog);
+    auto prog    = cudf::strings::regex_program::create(pattern);
+    auto results = cudf::strings::extract(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view().column(0), expected);
   }
 }
@@ -170,23 +164,19 @@ TEST_F(StringsExtractTests, MultiLine)
   auto view = cudf::strings_column_view(input);
 
   auto pattern = std::string("(^[a-c]+$)");
-  auto results = cudf::strings::extract(view, pattern, cudf::strings::regex_flags::MULTILINE);
   cudf::test::strings_column_wrapper expected_multiline({"abc", "abc", "abc", "", "abc", "abc"},
                                                         {1, 1, 1, 0, 1, 1});
   auto expected = cudf::table_view{{expected_multiline}};
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
   auto prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::MULTILINE);
-  results   = cudf::strings::extract(view, *prog);
+  auto results = cudf::strings::extract(view, *prog);
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 
   pattern = std::string("^([a-c]+)$");
-  results = cudf::strings::extract(view, pattern);
   cudf::test::strings_column_wrapper expected_default({"", "", "abc", "", "abc", ""},
                                                       {0, 0, 1, 0, 1, 0});
   expected = cudf::table_view{{expected_default}};
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::extract(view, *prog);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::extract(view, *prog);
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 }
 
@@ -196,21 +186,17 @@ TEST_F(StringsExtractTests, DotAll)
   auto view  = cudf::strings_column_view(input);
 
   auto pattern = std::string("(a.*f)");
-  auto results = cudf::strings::extract(view, pattern, cudf::strings::regex_flags::DOTALL);
   cudf::test::strings_column_wrapper expected_dotall({"abc\nfa\nef", "abbc\nfff", "abcdef", ""},
                                                      {1, 1, 1, 0});
   auto expected = cudf::table_view{{expected_dotall}};
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
-  results   = cudf::strings::extract(view, *prog);
+  auto prog     = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
+  auto results  = cudf::strings::extract(view, *prog);
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 
-  results = cudf::strings::extract(view, pattern);
   cudf::test::strings_column_wrapper expected_default({"", "", "abcdef", ""}, {0, 0, 1, 0});
   expected = cudf::table_view{{expected_default}};
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::extract(view, *prog);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::extract(view, *prog);
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 }
 
@@ -224,7 +210,6 @@ TEST_F(StringsExtractTests, EmptyExtractTest)
   auto strings_view = cudf::strings_column_view(strings);
 
   auto pattern = std::string("([^_]*)\\Z");
-  auto results = cudf::strings::extract(strings_view, pattern);
 
   std::vector<const char*> h_expected{nullptr, "AAA", "A", "", "", ""};
   cudf::test::strings_column_wrapper expected(
@@ -234,9 +219,8 @@ TEST_F(StringsExtractTests, EmptyExtractTest)
   std::vector<std::unique_ptr<cudf::column>> columns;
   columns.push_back(expected.release());
   cudf::table table_expected(std::move(columns));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, table_expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::extract(strings_view, *prog);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::extract(strings_view, *prog);
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, table_expected);
 }
 
@@ -250,7 +234,6 @@ TEST_F(StringsExtractTests, ExtractAllTest)
   auto sv = cudf::strings_column_view(input);
 
   auto pattern = std::string("(\\d+) (\\w+)");
-  auto results = cudf::strings::extract_all_record(sv, pattern);
 
   bool valids[] = {true, true, true, false, false, false, true};
   using LCW     = cudf::test::lists_column_wrapper<cudf::string_view>;
@@ -262,9 +245,8 @@ TEST_F(StringsExtractTests, ExtractAllTest)
                 LCW{},
                 LCW{"4", "pare"}},
                valids);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::extract_all_record(sv, *prog);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::extract_all_record(sv, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
@@ -276,9 +258,7 @@ TEST_F(StringsExtractTests, Errors)
   auto pattern = std::string("\\w+");
   auto prog    = cudf::strings::regex_program::create(pattern);
 
-  EXPECT_THROW(cudf::strings::extract(sv, pattern), cudf::logic_error);
   EXPECT_THROW(cudf::strings::extract(sv, *prog), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::extract_all_record(sv, pattern), cudf::logic_error);
   EXPECT_THROW(cudf::strings::extract_all_record(sv, *prog), cudf::logic_error);
 }
 
@@ -288,6 +268,7 @@ TEST_F(StringsExtractTests, MediumRegex)
   std::string medium_regex =
     "hello @abc @def (world) The quick brown @fox jumps over the lazy @dog hello "
     "http://www.world.com";
+  auto prog = cudf::strings::regex_program::create(medium_regex);
 
   std::vector<const char*> h_strings{
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
@@ -302,7 +283,7 @@ TEST_F(StringsExtractTests, MediumRegex)
     thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
 
   auto strings_view = cudf::strings_column_view(strings);
-  auto results      = cudf::strings::extract(strings_view, medium_regex);
+  auto results      = cudf::strings::extract(strings_view, *prog);
   std::vector<const char*> h_expected{"world", nullptr, nullptr};
   cudf::test::strings_column_wrapper expected(
     h_expected.begin(),
@@ -317,6 +298,7 @@ TEST_F(StringsExtractTests, LargeRegex)
   std::string large_regex =
     "hello @abc @def world The (quick) brown @fox jumps over the lazy @dog hello "
     "http://www.world.com I'm here @home zzzz";
+  auto prog = cudf::strings::regex_program::create(large_regex);
 
   std::vector<const char*> h_strings{
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
@@ -331,7 +313,7 @@ TEST_F(StringsExtractTests, LargeRegex)
     thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
 
   auto strings_view = cudf::strings_column_view(strings);
-  auto results      = cudf::strings::extract(strings_view, large_regex);
+  auto results      = cudf::strings::extract(strings_view, *prog);
   std::vector<const char*> h_expected{"quick", nullptr, nullptr};
   cudf::test::strings_column_wrapper expected(
     h_expected.begin(),
diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp
index 6428be28e0a..c105f4ace6f 100644
--- a/cpp/tests/strings/findall_tests.cpp
+++ b/cpp/tests/strings/findall_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,7 +39,6 @@ TEST_F(StringsFindallTests, FindallTest)
   auto sv = cudf::strings_column_view(input);
 
   auto pattern = std::string("(\\d+)-(\\w+)");
-  auto results = cudf::strings::findall(sv, pattern);
 
   using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"3-A"},
@@ -51,9 +50,8 @@ TEST_F(StringsFindallTests, FindallTest)
                 LCW{},
                 LCW{"25-9000"}},
                valids);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::findall(sv, *prog);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::findall(sv, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
@@ -63,12 +61,10 @@ TEST_F(StringsFindallTests, Multiline)
   auto view = cudf::strings_column_view(input);
 
   auto pattern = std::string("(^abc$)");
-  auto results = cudf::strings::findall(view, pattern, cudf::strings::regex_flags::MULTILINE);
   using LCW    = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"abc", "abc"}, LCW{"abc"}, LCW{"abc"}, LCW{}, LCW{"abc"}});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
   auto prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::MULTILINE);
-  results   = cudf::strings::findall(view, *prog);
+  auto results = cudf::strings::findall(view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
@@ -78,12 +74,10 @@ TEST_F(StringsFindallTests, DotAll)
   auto view = cudf::strings_column_view(input);
 
   auto pattern = std::string("(b.*f)");
-  auto results = cudf::strings::findall(view, pattern, cudf::strings::regex_flags::DOTALL);
   using LCW    = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"bc\nfa\nef"}, LCW{"bbc\nfff"}, LCW{"bcdef"}, LCW{}});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
-  auto prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
-  results   = cudf::strings::findall(view, *prog);
+  auto prog    = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
+  auto results = cudf::strings::findall(view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
@@ -91,10 +85,11 @@ TEST_F(StringsFindallTests, MediumRegex)
 {
   // This results in 15 regex instructions and falls in the 'medium' range.
   std::string medium_regex = "(\\w+) (\\w+) (\\d+)";
+  auto prog                = cudf::strings::regex_program::create(medium_regex);
 
   cudf::test::strings_column_wrapper input({"first words 1234 and just numbers 9876", "neither"});
   auto strings_view = cudf::strings_column_view(input);
-  auto results      = cudf::strings::findall(strings_view, medium_regex);
+  auto results      = cudf::strings::findall(strings_view, *prog);
 
   using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"first words 1234", "just numbers 9876"}, LCW{}});
@@ -107,6 +102,7 @@ TEST_F(StringsFindallTests, LargeRegex)
   std::string large_regex =
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
     "http://www.world.com I'm here @home zzzz";
+  auto prog = cudf::strings::regex_program::create(large_regex);
 
   cudf::test::strings_column_wrapper input(
     {"hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
@@ -119,7 +115,7 @@ TEST_F(StringsFindallTests, LargeRegex)
      "qrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"});
 
   auto strings_view = cudf::strings_column_view(input);
-  auto results      = cudf::strings::findall(strings_view, large_regex);
+  auto results      = cudf::strings::findall(strings_view, *prog);
 
   using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{large_regex.c_str()}, LCW{}, LCW{}});
diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp
index 840d998e56c..d7d0576d0eb 100644
--- a/cpp/tests/strings/replace_regex_tests.cpp
+++ b/cpp/tests/strings/replace_regex_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -54,12 +54,10 @@ TEST_F(StringsReplaceRegexTest, ReplaceRegexTest)
 
   auto pattern = std::string("(\\bthe\\b)");
   auto repl    = cudf::string_scalar("=");
-  auto results = cudf::strings::replace_re(strings_view, pattern, repl);
   cudf::test::strings_column_wrapper expected(
     h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_re(strings_view, *prog, repl);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_re(strings_view, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
@@ -120,8 +118,6 @@ TEST_F(StringsReplaceRegexTest, WithEmptyPattern)
   auto repls_view = cudf::strings_column_view(repls);
   auto results    = cudf::strings::replace_re(strings_view, patterns, repls_view);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings);
-  results = cudf::strings::replace_re(strings_view, "", repl);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings);
   auto prog = cudf::strings::regex_program::create(empty_pattern);
   results   = cudf::strings::replace_re(strings_view, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings);
@@ -134,15 +130,11 @@ TEST_F(StringsReplaceRegexTest, MultiReplacement)
 
   auto pattern = std::string("aba");
   auto repl    = cudf::string_scalar("_");
-  auto results = cudf::strings::replace_re(sv, pattern, repl, 2);
   cudf::test::strings_column_wrapper expected({"_ bcd _", "_b_ abababa"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_re(sv, *prog, repl, 2);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_re(sv, *prog, repl, 2);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 
-  results = cudf::strings::replace_re(sv, pattern, repl, 0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, input);
   results = cudf::strings::replace_re(sv, *prog, repl, 0);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, input);
 }
@@ -154,19 +146,15 @@ TEST_F(StringsReplaceRegexTest, WordBoundary)
 
   auto pattern  = std::string("\\b");
   auto repl     = cudf::string_scalar("X");
-  auto results  = cudf::strings::replace_re(sv, pattern, repl);
   auto expected = cudf::test::strings_column_wrapper(
     {"XabaX XbcdX\nXabaX", "XzézX", "XA1B2X-Xé3X", "XeX XéX", "X_X", "Xa_bX"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_re(sv, *prog, repl);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 
   pattern  = std::string("\\B");
-  results  = cudf::strings::replace_re(sv, pattern, repl);
   expected = cudf::test::strings_column_wrapper(
     {"aXbXa bXcXd\naXbXa", "zXéXz", "AX1XBX2-éX3", "e é", "_", "aX_Xb"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   prog    = cudf::strings::regex_program::create(pattern);
   results = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
@@ -180,17 +168,13 @@ TEST_F(StringsReplaceRegexTest, Alternation)
 
   auto pattern = std::string("(^|\\s)\\d+(\\s|$)");
   auto repl    = cudf::string_scalar("_");
-  auto results = cudf::strings::replace_re(sv, pattern, repl);
   auto expected =
     cudf::test::strings_column_wrapper({"__ brr __ hello _", "_ABC_2022", "abé123 _ 89xyz"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_re(sv, *prog, repl);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 
   pattern = std::string("(\\s|^)\\d+($|\\s)");
-  results = cudf::strings::replace_re(sv, pattern, repl);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   prog    = cudf::strings::regex_program::create(pattern);
   results = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
@@ -203,19 +187,15 @@ TEST_F(StringsReplaceRegexTest, ZeroLengthMatch)
 
   auto pattern  = std::string("D*");
   auto repl     = cudf::string_scalar("_");
-  auto results  = cudf::strings::replace_re(sv, pattern, repl);
   auto expected = cudf::test::strings_column_wrapper({"__", "_z_é_z_", "__s__s_", "_"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_re(sv, *prog, repl);
+  auto prog     = cudf::strings::regex_program::create(pattern);
+  auto results  = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 
   pattern  = std::string("D?s?");
-  results  = cudf::strings::replace_re(sv, pattern, repl);
   expected = cudf::test::strings_column_wrapper({"___", "_z_é_z_", "___", "_"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  prog    = cudf::strings::regex_program::create(pattern);
-  results = cudf::strings::replace_re(sv, *prog, repl);
+  prog     = cudf::strings::regex_program::create(pattern);
+  results  = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
@@ -229,16 +209,12 @@ TEST_F(StringsReplaceRegexTest, Multiline)
   // single-replace
   auto pattern = std::string("^aba$");
   auto repl    = cudf::string_scalar("_");
-  auto results = cudf::strings::replace_re(sv, pattern, repl, std::nullopt, multiline);
   cudf::test::strings_column_wrapper expected_ml({"bcd\n_\nefg", "_\naba abab\n_", "_"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_ml);
-  auto prog = cudf::strings::regex_program::create(pattern, multiline);
-  results   = cudf::strings::replace_re(sv, *prog, repl);
+  auto prog    = cudf::strings::regex_program::create(pattern, multiline);
+  auto results = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_ml);
 
-  results = cudf::strings::replace_re(sv, pattern, repl);
   cudf::test::strings_column_wrapper expected({"bcd\naba\nefg", "aba\naba abab\naba", "_"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   prog    = cudf::strings::regex_program::create(pattern);
   results = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
@@ -257,18 +233,14 @@ TEST_F(StringsReplaceRegexTest, Multiline)
   // backref-replace
   auto repl_template = std::string("[\\1]");
   pattern            = std::string("(^aba)");
-  results            = cudf::strings::replace_with_backrefs(sv, pattern, repl_template, multiline);
   cudf::test::strings_column_wrapper br_expected_ml(
     {"bcd\n[aba]\nefg", "[aba]\n[aba] abab\n[aba]", "[aba]"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected_ml);
   prog    = cudf::strings::regex_program::create(pattern, multiline);
   results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected_ml);
 
-  results = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
   cudf::test::strings_column_wrapper br_expected(
     {"bcd\naba\nefg", "[aba]\naba abab\naba", "[aba]"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected);
   prog    = cudf::strings::regex_program::create(pattern);
   results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected);
@@ -298,12 +270,10 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexTest)
 
   auto pattern       = std::string("(\\w) (\\w)");
   auto repl_template = std::string("\\1-\\2");
-  auto results       = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
   cudf::test::strings_column_wrapper expected(
     h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
@@ -314,13 +284,11 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexAltIndexPatternTest)
 
   auto pattern       = std::string("(\\d+)-(\\d+)");
   auto repl_template = std::string("${2} X ${1}0");
-  auto results       = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
 
   cudf::test::strings_column_wrapper expected(
     {"3 X 120 5 X 340 89 X 670", "99 X 00: 888 X 7770:: 0 X 56730"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
@@ -332,7 +300,6 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexReversedTest)
 
   auto pattern       = std::string("([a-z])-([a-zé])");
   auto repl_template = std::string("X\\2+\\1Z");
-  auto results       = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
 
   cudf::test::strings_column_wrapper expected({"A543",
                                                "Z756",
@@ -341,9 +308,8 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexReversedTest)
                                                "twXt+oZhréé fouXf+rZivé",
                                                "abcXé+dZfgh",
                                                "tésXs+tZtrinXa+gZgain"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
@@ -356,17 +322,13 @@ TEST_F(StringsReplaceRegexTest, BackrefWithGreedyQuantifier)
   auto pattern       = std::string("<h1>(.*)</h1><h2>(.*)</h2>");
   auto repl_template = std::string("<h2>\\1</h2><p>\\2</p>");
 
-  auto results = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
   cudf::test::strings_column_wrapper expected(
     {"<h2>title</h2><p>ABC</p>", "<h2>1234567</h2><p>XYZ</p>"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 
   pattern = std::string("<h1>([a-z\\d]+)</h1><h2>([A-Z]+)</h2>");
-  results = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   prog    = cudf::strings::regex_program::create(pattern);
   results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
@@ -380,7 +342,6 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexZeroIndexTest)
 
   auto pattern       = std::string("(TEST)(\\d+)");
   auto repl_template = std::string("${0}: ${1}, ${2}; ");
-  auto results       = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
 
   cudf::test::strings_column_wrapper expected({
     "TEST123: TEST, 123; ",
@@ -389,9 +350,8 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexZeroIndexTest)
     "TEST1: TEST, 1; -TEST-T",
     "TES3",
   });
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  auto prog = cudf::strings::regex_program::create(pattern);
-  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
@@ -401,9 +361,12 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexErrorTest)
   auto view = cudf::strings_column_view(strings);
 
   // group index(3) exceeds the group count(2)
-  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, "(\\w).(\\w)", "\\3"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, "", "\\1"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, "(\\w)", ""), cudf::logic_error);
+  auto prog = cudf::strings::regex_program::create("(\\w).(\\w)");
+  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, *prog, "\\3"), cudf::logic_error);
+  prog = cudf::strings::regex_program::create("");
+  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, *prog, "\\1"), cudf::logic_error);
+  prog = cudf::strings::regex_program::create("(\\w)");
+  EXPECT_THROW(cudf::strings::replace_with_backrefs(view, *prog, ""), cudf::logic_error);
 }
 
 TEST_F(StringsReplaceRegexTest, MediumReplaceRegex)
@@ -412,6 +375,7 @@ TEST_F(StringsReplaceRegexTest, MediumReplaceRegex)
   std::string medium_regex =
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
     "http://www.world.com";
+  auto prog = cudf::strings::regex_program::create(medium_regex);
 
   std::vector<const char*> h_strings{
     "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
@@ -424,7 +388,7 @@ TEST_F(StringsReplaceRegexTest, MediumReplaceRegex)
     thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
 
   auto strings_view = cudf::strings_column_view(strings);
-  auto results      = cudf::strings::replace_re(strings_view, medium_regex);
+  auto results      = cudf::strings::replace_re(strings_view, *prog);
   std::vector<const char*> h_expected{
     " thats all", "12345678901234567890", "abcdefghijklmnopqrstuvwxyz"};
   cudf::test::strings_column_wrapper expected(
@@ -440,6 +404,7 @@ TEST_F(StringsReplaceRegexTest, LargeReplaceRegex)
   std::string large_regex =
     "hello @abc @def world The (quick) brown @fox jumps over the lazy @dog hello "
     "http://www.world.com I'm here @home zzzz";
+  auto prog = cudf::strings::regex_program::create(large_regex);
 
   std::vector<const char*> h_strings{
     "zzzz hello @abc @def world The quick brown @fox jumps over the lazy @dog hello "
@@ -452,7 +417,7 @@ TEST_F(StringsReplaceRegexTest, LargeReplaceRegex)
     thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
 
   auto strings_view = cudf::strings_column_view(strings);
-  auto results      = cudf::strings::replace_re(strings_view, large_regex);
+  auto results      = cudf::strings::replace_re(strings_view, *prog);
   std::vector<const char*> h_expected{
     "zzzz ", "12345678901234567890", "abcdefghijklmnopqrstuvwxyz"};
   cudf::test::strings_column_wrapper expected(
diff --git a/cpp/tests/strings/split_tests.cpp b/cpp/tests/strings/split_tests.cpp
index 714c1ad416a..7cca564d112 100644
--- a/cpp/tests/strings/split_tests.cpp
+++ b/cpp/tests/strings/split_tests.cpp
@@ -394,27 +394,22 @@ TEST_F(StringsSplitTest, SplitRegex)
 
   {
     auto pattern = std::string("\\s+");
-    auto result  = cudf::strings::split_re(sv, pattern);
 
     cudf::test::strings_column_wrapper col0({"", "", "are", "tést", ""}, validity);
     cudf::test::strings_column_wrapper col1({"Héllo", "", "some", "String", ""}, {1, 0, 1, 1, 0});
     cudf::test::strings_column_wrapper col2({"thesé", "", "", "", ""}, {1, 0, 1, 0, 0});
     auto expected = cudf::table_view({col0, col1, col2});
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    result    = cudf::strings::split_re(sv, *prog);
+    auto prog     = cudf::strings::regex_program::create(pattern);
+    auto result   = cudf::strings::split_re(sv, *prog);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
 
     // rsplit == split when using default parameters
-    result = cudf::strings::rsplit_re(sv, pattern);
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
     result = cudf::strings::rsplit_re(sv, *prog);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
   }
 
   {
     auto pattern = std::string("[eé]");
-    auto result  = cudf::strings::split_re(sv, pattern);
 
     cudf::test::strings_column_wrapper col0({" H", "", "ar", "t", ""}, validity);
     cudf::test::strings_column_wrapper col1({"llo th", "", " som", "st String", ""},
@@ -422,14 +417,11 @@ TEST_F(StringsSplitTest, SplitRegex)
     cudf::test::strings_column_wrapper col2({"s", "", "  ", "", ""}, {1, 0, 1, 0, 0});
     cudf::test::strings_column_wrapper col3({"", "", "", "", ""}, {1, 0, 0, 0, 0});
     auto expected = cudf::table_view({col0, col1, col2, col3});
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    result    = cudf::strings::split_re(sv, *prog);
+    auto prog     = cudf::strings::regex_program::create(pattern);
+    auto result   = cudf::strings::split_re(sv, *prog);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
 
     // rsplit == split when using default parameters
-    result = cudf::strings::rsplit_re(sv, pattern);
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
     result = cudf::strings::rsplit_re(sv, *prog);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
   }
@@ -446,26 +438,21 @@ TEST_F(StringsSplitTest, SplitRecordRegex)
   using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
   {
     auto pattern = std::string("\\s+");
-    auto result  = cudf::strings::split_record_re(sv, pattern);
 
     LCW expected(
       {LCW{"", "Héllo", "thesé"}, LCW{}, LCW{"are", "some", ""}, LCW{"tést", "String"}, LCW{""}},
       validity);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    result    = cudf::strings::split_record_re(sv, *prog);
+    auto prog   = cudf::strings::regex_program::create(pattern);
+    auto result = cudf::strings::split_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 
     // rsplit == split when using default parameters
-    result = cudf::strings::rsplit_record_re(sv, pattern);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
     result = cudf::strings::rsplit_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
   }
 
   {
     auto pattern = std::string("[eé]");
-    auto result  = cudf::strings::split_record_re(sv, pattern);
 
     LCW expected({LCW{" H", "llo th", "s", ""},
                   LCW{},
@@ -473,14 +460,11 @@ TEST_F(StringsSplitTest, SplitRecordRegex)
                   LCW{"t", "st String"},
                   LCW{""}},
                  validity);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    result    = cudf::strings::split_record_re(sv, *prog);
+    auto prog   = cudf::strings::regex_program::create(pattern);
+    auto result = cudf::strings::split_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 
     // rsplit == split when using default parameters
-    result = cudf::strings::rsplit_record_re(sv, pattern);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
     result = cudf::strings::rsplit_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
   }
@@ -495,48 +479,42 @@ TEST_F(StringsSplitTest, SplitRegexWithMaxSplit)
   auto sv = cudf::strings_column_view(input);
   {
     auto pattern = std::string("\\s+");
-    auto result  = cudf::strings::split_re(sv, pattern, 1);
 
     cudf::test::strings_column_wrapper col0({"", "", "are", "tést", ""}, {1, 0, 1, 1, 1});
     cudf::test::strings_column_wrapper col1({"Héllo\tthesé", "", "some  ", "String", ""},
                                             {1, 0, 1, 1, 0});
     auto expected = cudf::table_view({col0, col1});
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    result    = cudf::strings::split_re(sv, *prog, 1);
+    auto prog     = cudf::strings::regex_program::create(pattern);
+    auto result   = cudf::strings::split_re(sv, *prog, 1);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
 
     // split everything is the same output as maxsplit==2 for the test input column here
-    result         = cudf::strings::split_re(sv, pattern, 2);
-    auto expected2 = cudf::strings::split_re(sv, pattern);
+    result         = cudf::strings::split_re(sv, *prog, 2);
+    auto expected2 = cudf::strings::split_re(sv, *prog);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected2->view());
     result = cudf::strings::split_re(sv, *prog, 3);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected2->view());
   }
   {
     auto pattern = std::string("\\s");
-    auto result  = cudf::strings::split_record_re(sv, pattern, 1);
 
     using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
     LCW expected1(
       {LCW{"", "Héllo\tthesé"}, LCW{}, LCW{"are", "some  "}, LCW{"tést", "String"}, LCW{""}},
       validity);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected1);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    result    = cudf::strings::split_record_re(sv, *prog, 1);
+    auto prog   = cudf::strings::regex_program::create(pattern);
+    auto result = cudf::strings::split_record_re(sv, *prog, 1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected1);
 
-    result = cudf::strings::split_record_re(sv, pattern, 2);
+    result = cudf::strings::split_record_re(sv, *prog, 2);
     LCW expected2(
       {LCW{"", "Héllo", "thesé"}, LCW{}, LCW{"are", "some", " "}, LCW{"tést", "String"}, LCW{""}},
       validity);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected2);
-    result = cudf::strings::split_record_re(sv, *prog, 2);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected2);
 
     // split everything is the same output as maxsplit==3 for the test input column here
-    result         = cudf::strings::split_record_re(sv, pattern, 3);
-    auto expected0 = cudf::strings::split_record_re(sv, pattern);
+    result         = cudf::strings::split_record_re(sv, *prog, 3);
+    auto expected0 = cudf::strings::split_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected0->view());
     result = cudf::strings::split_record_re(sv, *prog, 3);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected0->view());
@@ -549,7 +527,6 @@ TEST_F(StringsSplitTest, SplitRegexWordBoundary)
   auto sv = cudf::strings_column_view(input);
   {
     auto pattern = std::string("\\b");
-    auto result  = cudf::strings::split_re(sv, pattern);
 
     cudf::test::strings_column_wrapper col0({"", "", "-+", ""});
     cudf::test::strings_column_wrapper col1({"a", "ab", "", "e"}, {1, 1, 0, 1});
@@ -557,20 +534,17 @@ TEST_F(StringsSplitTest, SplitRegexWordBoundary)
     cudf::test::strings_column_wrapper col3({"", "", "", "é"}, {0, 0, 0, 1});
     cudf::test::strings_column_wrapper col4({"", "", "", ""}, {0, 0, 0, 1});
     auto expected = cudf::table_view({col0, col1, col2, col3, col4});
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    result    = cudf::strings::split_re(sv, *prog);
+    auto prog     = cudf::strings::regex_program::create(pattern);
+    auto result   = cudf::strings::split_re(sv, *prog);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
   }
   {
     auto pattern = std::string("\\B");
-    auto result  = cudf::strings::split_record_re(sv, pattern);
 
     using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
     LCW expected({LCW{"a"}, LCW{"a", "b"}, LCW{"", "-", "+", ""}, LCW{"e\né"}});
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
-    auto prog = cudf::strings::regex_program::create(pattern);
-    result    = cudf::strings::split_record_re(sv, *prog);
+    auto prog   = cudf::strings::regex_program::create(pattern);
+    auto result = cudf::strings::split_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
   }
 }
@@ -678,29 +652,23 @@ TEST_F(StringsSplitTest, RSplitRegexWithMaxSplit)
   auto prog    = cudf::strings::regex_program::create(pattern);
 
   {
-    auto result = cudf::strings::rsplit_re(sv, pattern, 1);
-
     cudf::test::strings_column_wrapper col0({" Héllo", "", "are some", "tést", ""}, validity);
     cudf::test::strings_column_wrapper col1({"thesé", "", "", "String", ""}, {1, 0, 1, 1, 0});
     auto expected = cudf::table_view({col0, col1});
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
-    result = cudf::strings::rsplit_re(sv, *prog, 1);
+    auto result   = cudf::strings::rsplit_re(sv, *prog, 1);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
   }
   {
-    auto result = cudf::strings::rsplit_record_re(sv, pattern, 1);
-
     using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
     LCW expected(
       {LCW{" Héllo", "thesé"}, LCW{}, LCW{"are some", ""}, LCW{"tést", "String"}, LCW{""}},
       validity);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
-    result = cudf::strings::rsplit_record_re(sv, *prog, 1);
+    auto result = cudf::strings::rsplit_record_re(sv, *prog, 1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 
     // split everything is the same output as any maxsplit > 2 for the test input column here
-    result         = cudf::strings::rsplit_record_re(sv, pattern, 3);
-    auto expected0 = cudf::strings::rsplit_record_re(sv, pattern);
+    result         = cudf::strings::rsplit_record_re(sv, *prog, 3);
+    auto expected0 = cudf::strings::rsplit_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected0->view());
     result = cudf::strings::rsplit_record_re(sv, *prog, 3);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected0->view());
@@ -711,16 +679,17 @@ TEST_F(StringsSplitTest, SplitZeroSizeStringsColumns)
 {
   cudf::column_view zero_size_strings_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
+  auto prog    = cudf::strings::regex_program::create("\\s");
   auto results = cudf::strings::split(zero_size_strings_column);
   EXPECT_TRUE(results->num_columns() == 1);
   EXPECT_TRUE(results->num_rows() == 0);
   results = cudf::strings::rsplit(zero_size_strings_column);
   EXPECT_TRUE(results->num_columns() == 1);
   EXPECT_TRUE(results->num_rows() == 0);
-  results = cudf::strings::split_re(zero_size_strings_column, "\\s");
+  results = cudf::strings::split_re(zero_size_strings_column, *prog);
   EXPECT_TRUE(results->num_columns() == 1);
   EXPECT_TRUE(results->num_rows() == 0);
-  results = cudf::strings::rsplit_re(zero_size_strings_column, "\\s");
+  results = cudf::strings::rsplit_re(zero_size_strings_column, *prog);
   EXPECT_TRUE(results->num_columns() == 1);
   EXPECT_TRUE(results->num_rows() == 0);
 
@@ -728,9 +697,9 @@ TEST_F(StringsSplitTest, SplitZeroSizeStringsColumns)
   EXPECT_TRUE(list_result->size() == 0);
   list_result = cudf::strings::rsplit_record(zero_size_strings_column);
   EXPECT_TRUE(list_result->size() == 0);
-  list_result = cudf::strings::split_record_re(zero_size_strings_column, "\\s");
+  list_result = cudf::strings::split_record_re(zero_size_strings_column, *prog);
   EXPECT_TRUE(list_result->size() == 0);
-  list_result = cudf::strings::rsplit_record_re(zero_size_strings_column, "\\s");
+  list_result = cudf::strings::rsplit_record_re(zero_size_strings_column, *prog);
   EXPECT_TRUE(list_result->size() == 0);
 }
 
@@ -738,7 +707,8 @@ TEST_F(StringsSplitTest, SplitZeroSizeStringsColumns)
 TEST_F(StringsSplitTest, AllNullsCase)
 {
   cudf::test::strings_column_wrapper input({"", "", ""}, {0, 0, 0});
-  auto sv = cudf::strings_column_view(input);
+  auto sv   = cudf::strings_column_view(input);
+  auto prog = cudf::strings::regex_program::create("-");
 
   auto results = cudf::strings::split(sv);
   EXPECT_TRUE(results->num_columns() == 1);
@@ -752,10 +722,10 @@ TEST_F(StringsSplitTest, AllNullsCase)
   results = cudf::strings::rsplit(sv, cudf::string_scalar("-"));
   EXPECT_TRUE(results->num_columns() == 1);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0).view(), input);
-  results = cudf::strings::split_re(sv, "-");
+  results = cudf::strings::split_re(sv, *prog);
   EXPECT_TRUE(results->num_columns() == 1);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0).view(), input);
-  results = cudf::strings::rsplit_re(sv, "-");
+  results = cudf::strings::rsplit_re(sv, *prog);
   EXPECT_TRUE(results->num_columns() == 1);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0).view(), input);
 
@@ -765,9 +735,9 @@ TEST_F(StringsSplitTest, AllNullsCase)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
   list_result = cudf::strings::rsplit_record(sv);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
-  list_result = cudf::strings::split_record_re(sv, "-");
+  list_result = cudf::strings::split_record_re(sv, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
-  list_result = cudf::strings::rsplit_record_re(sv, "-");
+  list_result = cudf::strings::rsplit_record_re(sv, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
 }
 
@@ -945,6 +915,7 @@ TEST_F(StringsSplitTest, InvalidParameter)
 {
   cudf::test::strings_column_wrapper input({"string left intentionally blank"});
   auto strings_view = cudf::strings_column_view(input);
+  auto prog         = cudf::strings::regex_program::create("");
   EXPECT_THROW(cudf::strings::split(strings_view, cudf::string_scalar("", false)),
                cudf::logic_error);
   EXPECT_THROW(cudf::strings::rsplit(strings_view, cudf::string_scalar("", false)),
@@ -953,10 +924,10 @@ TEST_F(StringsSplitTest, InvalidParameter)
                cudf::logic_error);
   EXPECT_THROW(cudf::strings::rsplit_record(strings_view, cudf::string_scalar("", false)),
                cudf::logic_error);
-  EXPECT_THROW(cudf::strings::split_re(strings_view, ""), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::split_record_re(strings_view, ""), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::rsplit_re(strings_view, ""), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::rsplit_record_re(strings_view, ""), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::split_re(strings_view, *prog), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::split_record_re(strings_view, *prog), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::rsplit_re(strings_view, *prog), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::rsplit_record_re(strings_view, *prog), cudf::logic_error);
   EXPECT_THROW(cudf::strings::partition(strings_view, cudf::string_scalar("", false)),
                cudf::logic_error);
   EXPECT_THROW(cudf::strings::rpartition(strings_view, cudf::string_scalar("", false)),

From ea62e0ef608e87b7f37ca2a3daee5d1aeb91c8e6 Mon Sep 17 00:00:00 2001
From: jakirkham <jakirkham@gmail.com>
Date: Thu, 9 Mar 2023 07:20:32 -0800
Subject: [PATCH 52/60] Drop Python 3.7 handling for pickle protocol 4 (#12857)

Fixes https://github.com/rapidsai/cudf/issues/12851

Now that Python 3.8 is the minimum supported version, drop the special casing for Python 3.7's `HIGHEST_PROTOCOL`, which was 4 (not 5). In Python 3.8+, `HIGHEST_PROTOCOL >= 5`. So none of these branches are needed any more.

Authors:
  - https://github.com/jakirkham
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12857
---
 python/cudf/cudf/tests/test_pack.py     | 17 ++++++++---------
 python/cudf/cudf/tests/test_pickling.py | 17 +++++++----------
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/python/cudf/cudf/tests/test_pack.py b/python/cudf/cudf/tests/test_pack.py
index 9972071122e..9b5a8c19cf5 100644
--- a/python/cudf/cudf/tests/test_pack.py
+++ b/python/cudf/cudf/tests/test_pack.py
@@ -191,15 +191,14 @@ def check_packed_pickled_equality(df):
     assert isinstance(sortvaldf.index, GenericIndex)
     assert_packed_frame_picklable(sortvaldf)
     # out-of-band
-    if pickle.HIGHEST_PROTOCOL >= 5:
-        buffers = []
-        serialbytes = pickle.dumps(
-            pack(df), protocol=5, buffer_callback=buffers.append
-        )
-        for b in buffers:
-            assert isinstance(b, pickle.PickleBuffer)
-        loaded = unpack(pickle.loads(serialbytes, buffers=buffers))
-        assert_eq(loaded, df)
+    buffers = []
+    serialbytes = pickle.dumps(
+        pack(df), protocol=5, buffer_callback=buffers.append
+    )
+    for b in buffers:
+        assert isinstance(b, pickle.PickleBuffer)
+    loaded = unpack(pickle.loads(serialbytes, buffers=buffers))
+    assert_eq(loaded, df)
 
 
 def assert_packed_frame_picklable(df):
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index 8ce818e7a3d..71c1f206a64 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 import pickle
 
@@ -25,15 +25,12 @@ def check_serialization(df):
     assert isinstance(sortvaldf.index, (GenericIndex, RangeIndex))
     assert_frame_picklable(sortvaldf)
     # out-of-band
-    if pickle.HIGHEST_PROTOCOL >= 5:
-        buffers = []
-        serialbytes = pickle.dumps(
-            df, protocol=5, buffer_callback=buffers.append
-        )
-        for b in buffers:
-            assert isinstance(b, pickle.PickleBuffer)
-        loaded = pickle.loads(serialbytes, buffers=buffers)
-        assert_eq(loaded, df)
+    buffers = []
+    serialbytes = pickle.dumps(df, protocol=5, buffer_callback=buffers.append)
+    for b in buffers:
+        assert isinstance(b, pickle.PickleBuffer)
+    loaded = pickle.loads(serialbytes, buffers=buffers)
+    assert_eq(loaded, df)
 
 
 def assert_frame_picklable(df):

From ff96cd1c33028e5b5ebb823e5c4107ae585e70f5 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Thu, 9 Mar 2023 22:17:25 +0530
Subject: [PATCH 53/60] Fix FST, JSON gtests & benchmarks coded in namespace
 cudf::test (#12907)

Fixes FST_TEST gtests source files coded in namespace cudf::test
Fixes NESTED_JSON_TEST gtests source files coded in namespace cudf::test, and fixes using cudf::io::json namespace
Fixes FST_NVBENCH benchmark coded in namespace cudf

Reference https://github.com/rapidsai/cudf/issues/11734

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - David Wendt (https://github.com/davidwendt)
  - Elias Stehle (https://github.com/elstehle)

URL: https://github.com/rapidsai/cudf/pull/12907
---
 cpp/benchmarks/io/fst.cu     |  28 ++--
 cpp/tests/io/fst/common.hpp  |   5 +-
 cpp/tests/io/fst/fst_test.cu |   4 +-
 cpp/tests/io/json_tree.cpp   | 248 +++++++++++++++++------------------
 4 files changed, 135 insertions(+), 150 deletions(-)

diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu
index 7acf69e9d8e..7fb505f1d34 100644
--- a/cpp/benchmarks/io/fst.cu
+++ b/cpp/benchmarks/io/fst.cu
@@ -37,11 +37,10 @@
 
 #include <cstdlib>
 
-namespace cudf {
 namespace {
 auto make_test_json_data(nvbench::state& state)
 {
-  auto const string_size{size_type(state.get_int64("string_size"))};
+  auto const string_size{cudf::size_type(state.get_int64("string_size"))};
 
   // Test input
   std::string input = R"(  {)"
@@ -59,13 +58,12 @@ auto make_test_json_data(nvbench::state& state)
                       R"("price": 8.95)"
                       R"(}  {} [] [ ])";
 
-  auto d_input_scalar          = cudf::make_string_scalar(input);
-  auto& d_string_scalar        = static_cast<cudf::string_scalar&>(*d_input_scalar);
-  const size_type repeat_times = string_size / input.size();
+  auto d_input_scalar                = cudf::make_string_scalar(input);
+  auto& d_string_scalar              = static_cast<cudf::string_scalar&>(*d_input_scalar);
+  const cudf::size_type repeat_times = string_size / input.size();
   return cudf::strings::repeat_string(d_string_scalar, repeat_times);
 }
 
-using namespace cudf::test::io::json;
 // Type used to represent the atomic symbol type used within the finite-state machine
 using SymbolT = char;
 // Type sufficiently large to index symbols within the input and output (may be unsigned)
@@ -78,9 +76,9 @@ constexpr std::size_t single_item = 1;
 
 void BM_FST_JSON(nvbench::state& state)
 {
-  CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
+  CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<cudf::size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
-  auto const string_size{size_type(state.get_int64("string_size"))};
+  auto const string_size{cudf::size_type(state.get_int64("string_size"))};
   // Prepare cuda stream for data transfers & kernels
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
@@ -113,9 +111,9 @@ void BM_FST_JSON(nvbench::state& state)
 
 void BM_FST_JSON_no_outidx(nvbench::state& state)
 {
-  CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
+  CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<cudf::size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
-  auto const string_size{size_type(state.get_int64("string_size"))};
+  auto const string_size{cudf::size_type(state.get_int64("string_size"))};
   // Prepare cuda stream for data transfers & kernels
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
@@ -148,9 +146,9 @@ void BM_FST_JSON_no_outidx(nvbench::state& state)
 
 void BM_FST_JSON_no_out(nvbench::state& state)
 {
-  CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
+  CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<cudf::size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
-  auto const string_size{size_type(state.get_int64("string_size"))};
+  auto const string_size{cudf::size_type(state.get_int64("string_size"))};
   // Prepare cuda stream for data transfers & kernels
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
@@ -181,9 +179,9 @@ void BM_FST_JSON_no_out(nvbench::state& state)
 
 void BM_FST_JSON_no_str(nvbench::state& state)
 {
-  CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<size_type>::max(),
+  CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits<cudf::size_type>::max(),
                "Benchmarks only support up to size_type's maximum number of items");
-  auto const string_size{size_type(state.get_int64("string_size"))};
+  auto const string_size{cudf::size_type(state.get_int64("string_size"))};
   // Prepare cuda stream for data transfers & kernels
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
@@ -228,5 +226,3 @@ NVBENCH_BENCH(BM_FST_JSON_no_out)
 NVBENCH_BENCH(BM_FST_JSON_no_str)
   .set_name("FST_JSON_no_str")
   .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1));
-
-}  // namespace cudf
diff --git a/cpp/tests/io/fst/common.hpp b/cpp/tests/io/fst/common.hpp
index ce09c810e88..382d21fabb8 100644
--- a/cpp/tests/io/fst/common.hpp
+++ b/cpp/tests/io/fst/common.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@
 #include <string>
 #include <vector>
 
-namespace cudf::test::io::json {
 //------------------------------------------------------------------------------
 // TEST FST SPECIFICATIONS
 //------------------------------------------------------------------------------
@@ -81,5 +80,3 @@ std::array<std::string, NUM_SYMBOL_GROUPS - 1> const pda_sgs{"{", "[", "}", "]",
 
 // The DFA's starting state
 constexpr char start_state = static_cast<char>(dfa_states::TT_OOS);
-
-}  // namespace cudf::test::io::json
diff --git a/cpp/tests/io/fst/fst_test.cu b/cpp/tests/io/fst/fst_test.cu
index 64ecf1f7329..1866719aa66 100644
--- a/cpp/tests/io/fst/fst_test.cu
+++ b/cpp/tests/io/fst/fst_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -116,8 +116,6 @@ static std::pair<OutputItT, IndexOutputItT> fst_baseline(InputItT begin,
   }
   return {out_tape, out_index_tape};
 }
-
-using namespace cudf::test::io::json;
 }  // namespace
 
 // Base test fixture for tests
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
index 963fc698e70..c6b181fe8a1 100644
--- a/cpp/tests/io/json_tree.cpp
+++ b/cpp/tests/io/json_tree.cpp
@@ -34,20 +34,19 @@
 #include <unordered_map>
 
 namespace cuio_json = cudf::io::json;
-namespace cudf::io::json {
+
 // Host copy of tree_meta_t
 struct tree_meta_t2 {
-  std::vector<NodeT> node_categories;
-  std::vector<NodeIndexT> parent_node_ids;
-  std::vector<TreeDepthT> node_levels;
-  std::vector<SymbolOffsetT> node_range_begin;
-  std::vector<SymbolOffsetT> node_range_end;
+  std::vector<cuio_json::NodeT> node_categories;
+  std::vector<cuio_json::NodeIndexT> parent_node_ids;
+  std::vector<cuio_json::TreeDepthT> node_levels;
+  std::vector<cuio_json::SymbolOffsetT> node_range_begin;
+  std::vector<cuio_json::SymbolOffsetT> node_range_end;
 };
-}  // namespace cudf::io::json
 
 namespace {
 std::string get_node_string(std::size_t const node_id,
-                            cuio_json::tree_meta_t2 const& tree_rep,
+                            tree_meta_t2 const& tree_rep,
                             std::string const& json_input)
 {
   auto node_to_str = [](cuio_json::PdaTokenT const token) {
@@ -70,8 +69,7 @@ std::string get_node_string(std::size_t const node_id,
          "'>";
 }
 
-void print_tree_representation(std::string const& json_input,
-                               cuio_json::tree_meta_t2 const& tree_rep)
+void print_tree_representation(std::string const& json_input, tree_meta_t2 const& tree_rep)
 {
   for (std::size_t i = 0; i < tree_rep.node_categories.size(); i++) {
     auto parent_id = tree_rep.parent_node_ids[i];
@@ -91,12 +89,8 @@ void print_tree_representation(std::string const& json_input,
     std::cout << "\n";
   }
 }
-}  // namespace
 
-namespace cudf::io::json {
-namespace test {
-
-tree_meta_t2 to_cpu_tree(tree_meta_t const& d_value, rmm::cuda_stream_view stream)
+tree_meta_t2 to_cpu_tree(cuio_json::tree_meta_t const& d_value, rmm::cuda_stream_view stream)
 {
   return {cudf::detail::make_std_vector_async(d_value.node_categories, stream),
           cudf::detail::make_std_vector_async(d_value.parent_node_ids, stream),
@@ -108,12 +102,12 @@ tree_meta_t2 to_cpu_tree(tree_meta_t const& d_value, rmm::cuda_stream_view strea
 // DEBUG prints
 auto to_cat = [](auto v) -> std::string {
   switch (v) {
-    case NC_STRUCT: return " S";
-    case NC_LIST: return " L";
-    case NC_STR: return " \"";
-    case NC_VAL: return " V";
-    case NC_FN: return " F";
-    case NC_ERR: return "ER";
+    case cuio_json::NC_STRUCT: return " S";
+    case cuio_json::NC_LIST: return " L";
+    case cuio_json::NC_STR: return " \"";
+    case cuio_json::NC_VAL: return " V";
+    case cuio_json::NC_FN: return " F";
+    case cuio_json::NC_ERR: return "ER";
     default: return "UN";
   };
 };
@@ -131,7 +125,7 @@ void print_tree(tree_meta_t2 const& cpu_tree)
   print_vec(cpu_tree.node_range_begin, "node_range_begin", to_int);
   print_vec(cpu_tree.node_range_end, "node_range_end", to_int);
 }
-void print_tree(tree_meta_t const& d_gpu_tree)
+void print_tree(cuio_json::tree_meta_t const& d_gpu_tree)
 {
   auto const cpu_tree = to_cpu_tree(d_gpu_tree, cudf::get_default_stream());
   print_tree(cpu_tree);
@@ -166,7 +160,9 @@ bool compare_vector(std::vector<T> const& cpu_vec,
   return compare_vector(cpu_vec, gpu_vec, name);
 }
 
-void compare_trees(tree_meta_t2 const& cpu_tree, tree_meta_t const& d_gpu_tree, bool print = false)
+void compare_trees(tree_meta_t2 const& cpu_tree,
+                   cuio_json::tree_meta_t const& d_gpu_tree,
+                   bool print = false)
 {
   auto cpu_num_nodes = cpu_tree.node_categories.size();
   EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_categories.size());
@@ -230,36 +226,38 @@ auto translate_col_id(T const& col_id)
   return new_col_ids;
 }
 
-tree_meta_t2 get_tree_representation_cpu(device_span<PdaTokenT const> tokens_gpu,
-                                         device_span<SymbolOffsetT const> token_indices_gpu1,
-                                         cudf::io::json_reader_options const& options,
-                                         rmm::cuda_stream_view stream)
+tree_meta_t2 get_tree_representation_cpu(
+  cudf::device_span<cuio_json::PdaTokenT const> tokens_gpu,
+  cudf::device_span<cuio_json::SymbolOffsetT const> token_indices_gpu1,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream)
 {
   constexpr bool include_quote_char = true;
   // Copy the JSON tokens to the host
-  thrust::host_vector<PdaTokenT> tokens = cudf::detail::make_host_vector_async(tokens_gpu, stream);
-  thrust::host_vector<SymbolOffsetT> token_indices =
+  thrust::host_vector<cuio_json::PdaTokenT> tokens =
+    cudf::detail::make_host_vector_async(tokens_gpu, stream);
+  thrust::host_vector<cuio_json::SymbolOffsetT> token_indices =
     cudf::detail::make_host_vector_async(token_indices_gpu1, stream);
 
   // Make sure tokens have been copied to the host
   stream.synchronize();
 
   // DEBUG print
-  [[maybe_unused]] auto to_token_str = [](PdaTokenT token) {
+  [[maybe_unused]] auto to_token_str = [](cuio_json::PdaTokenT token) {
     switch (token) {
-      case token_t::StructBegin: return " {";
-      case token_t::StructEnd: return " }";
-      case token_t::ListBegin: return " [";
-      case token_t::ListEnd: return " ]";
-      case token_t::FieldNameBegin: return "FB";
-      case token_t::FieldNameEnd: return "FE";
-      case token_t::StringBegin: return "SB";
-      case token_t::StringEnd: return "SE";
-      case token_t::ErrorBegin: return "er";
-      case token_t::ValueBegin: return "VB";
-      case token_t::ValueEnd: return "VE";
-      case token_t::StructMemberBegin: return " <";
-      case token_t::StructMemberEnd: return " >";
+      case cuio_json::token_t::StructBegin: return " {";
+      case cuio_json::token_t::StructEnd: return " }";
+      case cuio_json::token_t::ListBegin: return " [";
+      case cuio_json::token_t::ListEnd: return " ]";
+      case cuio_json::token_t::FieldNameBegin: return "FB";
+      case cuio_json::token_t::FieldNameEnd: return "FE";
+      case cuio_json::token_t::StringBegin: return "SB";
+      case cuio_json::token_t::StringEnd: return "SE";
+      case cuio_json::token_t::ErrorBegin: return "er";
+      case cuio_json::token_t::ValueBegin: return "VB";
+      case cuio_json::token_t::ValueEnd: return "VE";
+      case cuio_json::token_t::StructMemberBegin: return " <";
+      case cuio_json::token_t::StructMemberEnd: return " >";
       default: return ".";
     }
   };
@@ -272,95 +270,97 @@ tree_meta_t2 get_tree_representation_cpu(device_span<PdaTokenT const> tokens_gpu
   }
 
   // Whether a token does represent a node in the tree representation
-  auto is_node = [](PdaTokenT const token) {
+  auto is_node = [](cuio_json::PdaTokenT const token) {
     switch (token) {
-      case token_t::StructBegin:
-      case token_t::ListBegin:
-      case token_t::StringBegin:
-      case token_t::ValueBegin:
-      case token_t::FieldNameBegin:
-      case token_t::ErrorBegin: return true;
+      case cuio_json::token_t::StructBegin:
+      case cuio_json::token_t::ListBegin:
+      case cuio_json::token_t::StringBegin:
+      case cuio_json::token_t::ValueBegin:
+      case cuio_json::token_t::FieldNameBegin:
+      case cuio_json::token_t::ErrorBegin: return true;
       default: return false;
     };
   };
 
   // The node that a token represents
-  auto token_to_node = [](PdaTokenT const token) {
+  auto token_to_node = [](cuio_json::PdaTokenT const token) {
     switch (token) {
-      case token_t::StructBegin: return NC_STRUCT;
-      case token_t::ListBegin: return NC_LIST;
-      case token_t::StringBegin: return NC_STR;
-      case token_t::ValueBegin: return NC_STR;  // NC_VAL;
-      case token_t::FieldNameBegin: return NC_FN;
-      default: return NC_ERR;
+      case cuio_json::token_t::StructBegin: return cuio_json::NC_STRUCT;
+      case cuio_json::token_t::ListBegin: return cuio_json::NC_LIST;
+      case cuio_json::token_t::StringBegin: return cuio_json::NC_STR;
+      case cuio_json::token_t::ValueBegin: return cuio_json::NC_STR;  // NC_VAL;
+      case cuio_json::token_t::FieldNameBegin: return cuio_json::NC_FN;
+      default: return cuio_json::NC_ERR;
     };
   };
 
   // Includes quote char for end-of-string token or Skips the quote char for beginning-of-field-name
-  auto get_token_index = [include_quote_char](PdaTokenT const token,
-                                              SymbolOffsetT const token_index) {
-    constexpr SymbolOffsetT quote_char_size = 1;
+  auto get_token_index = [include_quote_char](cuio_json::PdaTokenT const token,
+                                              cuio_json::SymbolOffsetT const token_index) {
+    constexpr cuio_json::SymbolOffsetT quote_char_size = 1;
     switch (token) {
       // Strip off or include quote char for StringBegin
-      case token_t::StringBegin: return token_index + (include_quote_char ? 0 : quote_char_size);
+      case cuio_json::token_t::StringBegin:
+        return token_index + (include_quote_char ? 0 : quote_char_size);
       // Strip off or Include trailing quote char for string values for StringEnd
-      case token_t::StringEnd: return token_index + (include_quote_char ? quote_char_size : 0);
+      case cuio_json::token_t::StringEnd:
+        return token_index + (include_quote_char ? quote_char_size : 0);
       // Strip off quote char included for FieldNameBegin
-      case token_t::FieldNameBegin: return token_index + quote_char_size;
+      case cuio_json::token_t::FieldNameBegin: return token_index + quote_char_size;
       default: return token_index;
     };
   };
 
   // Whether a token expects to be followed by its respective end-of-* token partner
-  auto is_begin_of_section = [](PdaTokenT const token) {
+  auto is_begin_of_section = [](cuio_json::PdaTokenT const token) {
     switch (token) {
-      case token_t::StringBegin:
-      case token_t::ValueBegin:
-      case token_t::FieldNameBegin: return true;
+      case cuio_json::token_t::StringBegin:
+      case cuio_json::token_t::ValueBegin:
+      case cuio_json::token_t::FieldNameBegin: return true;
       default: return false;
     };
   };
 
   // The end-of-* partner token for a given beginning-of-* token
-  auto end_of_partner = [](PdaTokenT const token) {
+  auto end_of_partner = [](cuio_json::PdaTokenT const token) {
     switch (token) {
-      case token_t::StringBegin: return token_t::StringEnd;
-      case token_t::ValueBegin: return token_t::ValueEnd;
-      case token_t::FieldNameBegin: return token_t::FieldNameEnd;
-      default: return token_t::ErrorBegin;
+      case cuio_json::token_t::StringBegin: return cuio_json::token_t::StringEnd;
+      case cuio_json::token_t::ValueBegin: return cuio_json::token_t::ValueEnd;
+      case cuio_json::token_t::FieldNameBegin: return cuio_json::token_t::FieldNameEnd;
+      default: return cuio_json::token_t::ErrorBegin;
     };
   };
 
   // Whether the token pops from the parent node stack
-  auto does_pop = [](PdaTokenT const token) {
+  auto does_pop = [](cuio_json::PdaTokenT const token) {
     switch (token) {
-      case token_t::StructEnd:
-      case token_t::ListEnd: return true;
+      case cuio_json::token_t::StructEnd:
+      case cuio_json::token_t::ListEnd: return true;
       default: return false;
     };
   };
 
   // Whether the token pushes onto the parent node stack
-  auto does_push = [](PdaTokenT const token) {
+  auto does_push = [](cuio_json::PdaTokenT const token) {
     switch (token) {
-      case token_t::StructBegin:
-      case token_t::ListBegin: return true;
+      case cuio_json::token_t::StructBegin:
+      case cuio_json::token_t::ListBegin: return true;
       default: return false;
     };
   };
 
   // The node id sitting on top of the stack becomes the node's parent
   // The full stack represents the path from the root to the current node
-  std::stack<std::pair<NodeIndexT, bool>> parent_stack;
+  std::stack<std::pair<cuio_json::NodeIndexT, bool>> parent_stack;
 
   constexpr bool field_name_node    = true;
   constexpr bool no_field_name_node = false;
 
-  std::vector<NodeT> node_categories;
-  std::vector<NodeIndexT> parent_node_ids;
-  std::vector<TreeDepthT> node_levels;
-  std::vector<SymbolOffsetT> node_range_begin;
-  std::vector<SymbolOffsetT> node_range_end;
+  std::vector<cuio_json::NodeT> node_categories;
+  std::vector<cuio_json::NodeIndexT> parent_node_ids;
+  std::vector<cuio_json::TreeDepthT> node_levels;
+  std::vector<cuio_json::SymbolOffsetT> node_range_begin;
+  std::vector<cuio_json::SymbolOffsetT> node_range_end;
 
   std::size_t node_id = 0;
   for (std::size_t i = 0; i < tokens.size(); i++) {
@@ -372,7 +372,7 @@ tree_meta_t2 get_tree_representation_cpu(device_span<PdaTokenT const> tokens_gpu
 
     // Identify this node's parent node id
     std::size_t parent_node_id =
-      (parent_stack.size() > 0) ? parent_stack.top().first : parent_node_sentinel;
+      (parent_stack.size() > 0) ? parent_stack.top().first : cuio_json::parent_node_sentinel;
 
     // If this token is the beginning-of-{value, string, field name}, also consume the next end-of-*
     // token
@@ -395,7 +395,7 @@ tree_meta_t2 get_tree_representation_cpu(device_span<PdaTokenT const> tokens_gpu
     }
 
     // Modify the stack if needed
-    if (token == token_t::FieldNameBegin) {
+    if (token == cuio_json::token_t::FieldNameBegin) {
       parent_stack.push({node_id, field_name_node});
     } else {
       if (does_push(token)) {
@@ -422,20 +422,20 @@ tree_meta_t2 get_tree_representation_cpu(device_span<PdaTokenT const> tokens_gpu
           std::move(node_range_end)};
 }
 
-std::tuple<std::vector<NodeIndexT>, std::vector<size_type>> records_orient_tree_traversal_cpu(
-  host_span<SymbolT const> input,
-  tree_meta_t2 const& tree,
-  bool is_array_of_arrays,
-  bool is_enabled_lines,
-  rmm::cuda_stream_view stream)
+std::tuple<std::vector<cuio_json::NodeIndexT>, std::vector<cudf::size_type>>
+records_orient_tree_traversal_cpu(cudf::host_span<cuio_json::SymbolT const> input,
+                                  tree_meta_t2 const& tree,
+                                  bool is_array_of_arrays,
+                                  bool is_enabled_lines,
+                                  rmm::cuda_stream_view stream)
 {
-  std::vector<NodeIndexT> node_ids(tree.parent_node_ids.size());
+  std::vector<cuio_json::NodeIndexT> node_ids(tree.parent_node_ids.size());
   std::iota(node_ids.begin(), node_ids.end(), 0);
 
-  NodeIndexT const row_array_children_level = is_enabled_lines ? 1 : 2;
-  std::unordered_map<NodeIndexT, NodeIndexT> list_indices;
+  const cuio_json::NodeIndexT row_array_children_level = is_enabled_lines ? 1 : 2;
+  std::unordered_map<cuio_json::NodeIndexT, cuio_json::NodeIndexT> list_indices;
   if (is_array_of_arrays) {
-    NodeIndexT parent_node = -1, child_index = 0;
+    cuio_json::NodeIndexT parent_node = -1, child_index = 0;
     for (size_t i = 0; i < tree.node_levels.size(); i++) {
       if (tree.node_levels[i] == row_array_children_level) {
         if (tree.parent_node_ids[i] != parent_node) {
@@ -460,15 +460,17 @@ std::tuple<std::vector<NodeIndexT>, std::vector<size_type>> records_orient_tree_
   }
 
   // print_vec(tree.parent_node_ids, "tree.parent_node_ids (before)");
-  constexpr NodeIndexT top_node = -1;
+  constexpr cuio_json::NodeIndexT top_node = -1;
   // CPU version of the algorithm
   // Calculate row offsets too.
   auto hash_path = [&](auto node_id) {
     size_t seed = 0;
     while (node_id != top_node) {
-      seed = cudf::detail::hash_combine(seed, std::hash<TreeDepthT>{}(tree.node_levels[node_id]));
-      seed = cudf::detail::hash_combine(seed, std::hash<NodeT>{}(tree.node_categories[node_id]));
-      if (tree.node_categories[node_id] == node_t::NC_FN) {
+      seed = cudf::detail::hash_combine(
+        seed, std::hash<cuio_json::TreeDepthT>{}(tree.node_levels[node_id]));
+      seed = cudf::detail::hash_combine(
+        seed, std::hash<cuio_json::NodeT>{}(tree.node_categories[node_id]));
+      if (tree.node_categories[node_id] == cuio_json::node_t::NC_FN) {
         auto field_name =
           std::string_view(input.data() + tree.node_range_begin[node_id],
                            tree.node_range_end[node_id] - tree.node_range_begin[node_id]);
@@ -485,7 +487,7 @@ std::tuple<std::vector<NodeIndexT>, std::vector<size_type>> records_orient_tree_
     while (is_equal and node_id1 != top_node and node_id2 != top_node) {
       is_equal &= tree.node_levels[node_id1] == tree.node_levels[node_id2];
       is_equal &= tree.node_categories[node_id1] == tree.node_categories[node_id2];
-      if (is_equal and tree.node_categories[node_id1] == node_t::NC_FN) {
+      if (is_equal and tree.node_categories[node_id1] == cuio_json::node_t::NC_FN) {
         auto field_name1 =
           std::string_view(input.data() + tree.node_range_begin[node_id1],
                            tree.node_range_end[node_id1] - tree.node_range_begin[node_id1]);
@@ -503,8 +505,8 @@ std::tuple<std::vector<NodeIndexT>, std::vector<size_type>> records_orient_tree_
     }
     return is_equal and node_id1 == top_node and node_id2 == top_node;
   };
-  std::unordered_map<NodeIndexT, int, decltype(hash_path), decltype(equal_path)> node_id_map(
-    10, hash_path, equal_path);
+  std::unordered_map<cuio_json::NodeIndexT, int, decltype(hash_path), decltype(equal_path)>
+    node_id_map(10, hash_path, equal_path);
   auto unique_col_id = 0;
   for (auto& node_idx : node_ids) {
     if (node_id_map.count(node_idx) == 0) {
@@ -535,7 +537,7 @@ std::tuple<std::vector<NodeIndexT>, std::vector<size_type>> records_orient_tree_
       col_id_current_offset[current_col_id]++;
       row_offsets[i] = col_id_current_offset[current_col_id] - 1;
     } else {
-      if (tree.node_categories[parent_node_id] == node_t::NC_LIST and
+      if (tree.node_categories[parent_node_id] == cuio_json::node_t::NC_LIST and
           !(is_array_of_arrays and tree.node_levels[i] == row_array_children_level)) {
         col_id_current_offset[current_col_id]++;
         row_offsets[i] = col_id_current_offset[current_col_id] - 1;
@@ -551,10 +553,7 @@ std::tuple<std::vector<NodeIndexT>, std::vector<size_type>> records_orient_tree_
   return {std::move(node_ids), std::move(row_offsets)};
 }
 
-}  // namespace test
-}  // namespace cudf::io::json
-
-namespace json_test = cudf::io::json::test;
+}  // namespace
 
 // Base test fixture for tests
 struct JsonTest : public cudf::test::BaseFixture {
@@ -593,9 +592,8 @@ TEST_F(JsonTest, TreeRepresentation)
   // Get the JSON's tree representation
   auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream);
   // host tree generation
-  auto cpu_tree =
-    cuio_json::test::get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
-  json_test::compare_trees(cpu_tree, gpu_tree);
+  auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
+  compare_trees(cpu_tree, gpu_tree);
 
   // Print tree representation
   if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { print_tree_representation(input, cpu_tree); }
@@ -680,9 +678,8 @@ TEST_F(JsonTest, TreeRepresentation2)
   // Get the JSON's tree representation
   auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream);
   // host tree generation
-  auto cpu_tree =
-    cuio_json::test::get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
-  json_test::compare_trees(cpu_tree, gpu_tree);
+  auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
+  compare_trees(cpu_tree, gpu_tree);
 
   // Print tree representation
   if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { print_tree_representation(input, cpu_tree); }
@@ -754,9 +751,8 @@ TEST_F(JsonTest, TreeRepresentation3)
   // Get the JSON's tree representation
   auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream);
   // host tree generation
-  auto cpu_tree =
-    cuio_json::test::get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
-  json_test::compare_trees(cpu_tree, gpu_tree);
+  auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
+  compare_trees(cpu_tree, gpu_tree);
 
   // Print tree representation
   if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { print_tree_representation(input, cpu_tree); }
@@ -858,22 +854,21 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal)
   const auto [tokens_gpu, token_indices_gpu] =
     cudf::io::json::detail::get_token_stream(d_input, options, stream);
   // host tree generation
-  auto cpu_tree =
-    cuio_json::test::get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
+  auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream);
   bool const is_array_of_arrays =
     (cpu_tree.node_categories.size() > 0 and
      cpu_tree.node_categories[0] == cudf::io::json::NC_LIST) and
     (json_lines or (cpu_tree.node_categories.size() > 1 and
                     cpu_tree.node_categories[1] == cudf::io::json::NC_LIST));
   // host tree traversal
-  auto [cpu_col_id, cpu_row_offsets] = cuio_json::test::records_orient_tree_traversal_cpu(
-    input, cpu_tree, is_array_of_arrays, json_lines, stream);
+  auto [cpu_col_id, cpu_row_offsets] =
+    records_orient_tree_traversal_cpu(input, cpu_tree, is_array_of_arrays, json_lines, stream);
   // gpu tree generation
   auto gpu_tree = cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream);
   // Print tree representation
   if (std::getenv("NJP_DEBUG_DUMP") != nullptr) {
     printf("BEFORE traversal (gpu_tree):\n");
-    json_test::print_tree(gpu_tree);
+    print_tree(gpu_tree);
   }
   // gpu tree traversal
   auto [gpu_col_id, gpu_row_offsets] = cuio_json::detail::records_orient_tree_traversal(
@@ -881,12 +876,11 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal)
   // Print tree representation
   if (std::getenv("NJP_DEBUG_DUMP") != nullptr) {
     printf("AFTER  traversal (gpu_tree):\n");
-    json_test::print_tree(gpu_tree);
+    print_tree(gpu_tree);
   }
 
   // convert to sequence because gpu col id might be have random id
-  auto gpu_col_id2 =
-    json_test::translate_col_id(cudf::detail::make_std_vector_async(gpu_col_id, stream));
-  EXPECT_FALSE(json_test::compare_vector(cpu_col_id, gpu_col_id2, "col_id"));
-  EXPECT_FALSE(json_test::compare_vector(cpu_row_offsets, gpu_row_offsets, "row_offsets"));
+  auto gpu_col_id2 = translate_col_id(cudf::detail::make_std_vector_async(gpu_col_id, stream));
+  EXPECT_FALSE(compare_vector(cpu_col_id, gpu_col_id2, "col_id"));
+  EXPECT_FALSE(compare_vector(cpu_row_offsets, gpu_row_offsets, "row_offsets"));
 }

From 01375d0a971dc751fb71e00e40899e7eb87725e8 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Thu, 9 Mar 2023 22:17:45 +0530
Subject: [PATCH 54/60] Fix io/text gtests coded in namespace cudf::test
 (#12914)

Fixes DATA_CHUNK_SOURCE_TEST gtests source files coded in namespace cudf::test
Fixes MULTIBYTE_SPLIT_TEST gtests source files coded in namespace cudf::test

Reference https://github.com/rapidsai/cudf/issues/11734

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12914
---
 cpp/tests/io/text/data_chunk_source_test.cpp |  8 ++---
 cpp/tests/io/text/multibyte_split_test.cpp   | 38 +++++++++++---------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp
index b9733697eb8..acfe5a47624 100644
--- a/cpp/tests/io/text/data_chunk_source_test.cpp
+++ b/cpp/tests/io/text/data_chunk_source_test.cpp
@@ -25,12 +25,10 @@
 #include <fstream>
 #include <random>
 
-using namespace cudf::test;
+auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
+  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
 
-auto const temp_env = static_cast<TempDirTestEnvironment*>(
-  ::testing::AddGlobalTestEnvironment(new TempDirTestEnvironment));
-
-struct DataChunkSourceTest : public BaseFixture {
+struct DataChunkSourceTest : public cudf::test::BaseFixture {
 };
 
 std::string chunk_to_host(const cudf::io::text::device_data_chunk& chunk)
diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp
index 2dde12eb482..270c825fc7f 100644
--- a/cpp/tests/io/text/multibyte_split_test.cpp
+++ b/cpp/tests/io/text/multibyte_split_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,13 +30,11 @@
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
-using namespace cudf;
-using namespace test;
-
+using cudf::test::strings_column_wrapper;
 // 😀 | F0 9F 98 80 | 11110000 10011111 10011000 10000000
 // 😎 | F0 9F 98 8E | 11110000 10011111 10011000 10001110
 
-struct MultibyteSplitTest : public BaseFixture {
+struct MultibyteSplitTest : public cudf::test::BaseFixture {
 };
 
 TEST_F(MultibyteSplitTest, Simple)
@@ -265,7 +263,7 @@ TEST_F(MultibyteSplitTest, HandpickedInput)
   auto source = cudf::io::text::make_source(host_input);
   auto out    = cudf::io::text::multibyte_split(*source, delimiters);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, LargeInputMultipleRange)
@@ -290,7 +288,8 @@ TEST_F(MultibyteSplitTest, LargeInputMultipleRange)
 
   auto expected = cudf::io::text::multibyte_split(*source, delimiter);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    expected->view(), *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, LargeInputSparseMultipleRange)
@@ -317,7 +316,8 @@ TEST_F(MultibyteSplitTest, LargeInputSparseMultipleRange)
 
   auto expected = cudf::io::text::multibyte_split(*source, delimiter);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    expected->view(), *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, LargeInputMultipleRangeSingleByte)
@@ -342,7 +342,8 @@ TEST_F(MultibyteSplitTest, LargeInputMultipleRangeSingleByte)
 
   auto expected = cudf::io::text::multibyte_split(*source, delimiter);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    expected->view(), *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, LargeInputSparseMultipleRangeSingleByte)
@@ -368,7 +369,8 @@ TEST_F(MultibyteSplitTest, LargeInputSparseMultipleRangeSingleByte)
 
   auto expected = cudf::io::text::multibyte_split(*source, delimiter);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    expected->view(), *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, SmallInputAllPossibleRanges)
@@ -398,7 +400,8 @@ TEST_F(MultibyteSplitTest, SmallInputAllPossibleRanges)
 
       auto expected = multibyte_split(*source, delimiter);
 
-      CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+      CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+        expected->view(), *out, cudf::test::debug_output_level::ALL_ERRORS);
     }
   }
 }
@@ -430,7 +433,8 @@ TEST_F(MultibyteSplitTest, SmallInputAllPossibleRangesSingleByte)
 
       auto expected = multibyte_split(*source, delimiter);
 
-      CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS);
+      CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+        expected->view(), *out, cudf::test::debug_output_level::ALL_ERRORS);
     }
   }
 }
@@ -447,7 +451,7 @@ TEST_F(MultibyteSplitTest, SingletonRangeAtEnd)
 
   auto out = multibyte_split(*source, delimiter, byte_range_info{5, 1});
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, EmptyInput)
@@ -460,7 +464,7 @@ TEST_F(MultibyteSplitTest, EmptyInput)
 
   auto out = multibyte_split(*source, delimiter);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, EmptyInputSingleByte)
@@ -473,7 +477,7 @@ TEST_F(MultibyteSplitTest, EmptyInputSingleByte)
 
   auto out = multibyte_split(*source, delimiter);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, EmptyRange)
@@ -486,7 +490,7 @@ TEST_F(MultibyteSplitTest, EmptyRange)
 
   auto out = multibyte_split(*source, delimiter, byte_range_info{4, 0});
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, EmptyRangeSingleByte)
@@ -499,7 +503,7 @@ TEST_F(MultibyteSplitTest, EmptyRangeSingleByte)
 
   auto out = multibyte_split(*source, delimiter, byte_range_info{3, 0});
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, cudf::test::debug_output_level::ALL_ERRORS);
 }
 
 TEST_F(MultibyteSplitTest, EmptySplitDeviceSpan)

From 9ec54771d880132d943c51fa1ec354ebd609d1b1 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Thu, 9 Mar 2023 22:18:04 +0530
Subject: [PATCH 55/60] Fix benchmarks coded in namespace cudf and using
 namespace cudf (#12915)

Fixes BINARYOP_BENCH, GROUPBY_NVBENCH benchmarks from using namespace cudf directly.
Fixes SCATTER_LISTS_BENCH benchmarks coded in namespace cudf

Reference https://github.com/rapidsai/cudf/issues/11734

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Divye Gala (https://github.com/divyegala)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12915
---
 cpp/benchmarks/binaryop/compiled_binaryop.cpp | 12 ++-
 cpp/benchmarks/groupby/group_rank.cpp         | 16 ++--
 cpp/benchmarks/lists/copying/scatter_lists.cu | 73 +++++++++++--------
 3 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp
index 4c3bf360256..d32664cde4a 100644
--- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp
+++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -66,8 +66,14 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
 #define BINARYOP_BENCHMARK_DEFINE(lhs, rhs, bop, tout) \
   BM_BINARYOP_BENCHMARK_DEFINE(build_name(bop, lhs, rhs, tout), lhs, rhs, bop, tout)
 
-using namespace cudf;
-using namespace numeric;
+using cudf::duration_D;
+using cudf::duration_ms;
+using cudf::duration_ns;
+using cudf::duration_s;
+using cudf::timestamp_D;
+using cudf::timestamp_ms;
+using cudf::timestamp_s;
+using numeric::decimal32;
 
 // clang-format off
 BINARYOP_BENCHMARK_DEFINE(float,        int64_t,      ADD,                  int32_t);
diff --git a/cpp/benchmarks/groupby/group_rank.cpp b/cpp/benchmarks/groupby/group_rank.cpp
index 2a70b95890b..6aac3826e55 100644
--- a/cpp/benchmarks/groupby/group_rank.cpp
+++ b/cpp/benchmarks/groupby/group_rank.cpp
@@ -28,8 +28,7 @@ template <cudf::rank_method method>
 static void nvbench_groupby_rank(nvbench::state& state,
                                  nvbench::type_list<nvbench::enum_type<method>>)
 {
-  using namespace cudf;
-  constexpr auto dtype = type_to_id<int64_t>();
+  constexpr auto dtype = cudf::type_to_id<int64_t>();
 
   bool const is_sorted              = state.get_int64("is_sorted");
   cudf::size_type const column_size = state.get_int64("data_size");
@@ -43,16 +42,17 @@ static void nvbench_groupby_rank(nvbench::state& state,
   // values to be pre-sorted too for groupby rank
   if (is_sorted) source_table = cudf::sort(*source_table);
 
-  table_view keys{{source_table->view().column(0)}};
-  column_view order_by{source_table->view().column(1)};
+  cudf::table_view keys{{source_table->view().column(0)}};
+  cudf::column_view order_by{source_table->view().column(1)};
 
-  auto agg = cudf::make_rank_aggregation<groupby_scan_aggregation>(method);
-  std::vector<groupby::scan_request> requests;
-  requests.emplace_back(groupby::scan_request());
+  auto agg = cudf::make_rank_aggregation<cudf::groupby_scan_aggregation>(method);
+  std::vector<cudf::groupby::scan_request> requests;
+  requests.emplace_back(cudf::groupby::scan_request());
   requests[0].values = order_by;
   requests[0].aggregations.push_back(std::move(agg));
 
-  groupby::groupby gb_obj(keys, null_policy::EXCLUDE, is_sorted ? sorted::YES : sorted::NO);
+  cudf::groupby::groupby gb_obj(
+    keys, cudf::null_policy::EXCLUDE, is_sorted ? cudf::sorted::YES : cudf::sorted::NO);
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     rmm::cuda_stream_view stream_view{launch.get_stream()};
diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu
index 02ad97fee11..c913a093edd 100644
--- a/cpp/benchmarks/lists/copying/scatter_lists.cu
+++ b/cpp/benchmarks/lists/copying/scatter_lists.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,8 +32,6 @@
 
 #include <cmath>
 
-namespace cudf {
-
 class ScatterLists : public cudf::benchmark {
 };
 
@@ -43,14 +41,20 @@ void BM_lists_scatter(::benchmark::State& state)
   auto stream = cudf::get_default_stream();
   auto mr     = rmm::mr::get_current_device_resource();
 
-  const size_type base_size{(size_type)state.range(0)};
-  const size_type num_elements_per_row{(size_type)state.range(1)};
-  const auto num_rows = (size_type)ceil(double(base_size) / num_elements_per_row);
-
-  auto source_base_col = make_fixed_width_column(
-    data_type{type_to_id<TypeParam>()}, base_size, mask_state::UNALLOCATED, stream, mr);
-  auto target_base_col = make_fixed_width_column(
-    data_type{type_to_id<TypeParam>()}, base_size, mask_state::UNALLOCATED, stream, mr);
+  const cudf::size_type base_size{(cudf::size_type)state.range(0)};
+  const cudf::size_type num_elements_per_row{(cudf::size_type)state.range(1)};
+  const auto num_rows = (cudf::size_type)ceil(double(base_size) / num_elements_per_row);
+
+  auto source_base_col = make_fixed_width_column(cudf::data_type{cudf::type_to_id<TypeParam>()},
+                                                 base_size,
+                                                 cudf::mask_state::UNALLOCATED,
+                                                 stream,
+                                                 mr);
+  auto target_base_col = make_fixed_width_column(cudf::data_type{cudf::type_to_id<TypeParam>()},
+                                                 base_size,
+                                                 cudf::mask_state::UNALLOCATED,
+                                                 stream,
+                                                 mr);
   thrust::sequence(rmm::exec_policy(stream),
                    source_base_col->mutable_view().begin<TypeParam>(),
                    source_base_col->mutable_view().end<TypeParam>());
@@ -58,19 +62,27 @@ void BM_lists_scatter(::benchmark::State& state)
                    target_base_col->mutable_view().begin<TypeParam>(),
                    target_base_col->mutable_view().end<TypeParam>());
 
-  auto source_offsets = make_fixed_width_column(
-    data_type{type_to_id<offset_type>()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
-  auto target_offsets = make_fixed_width_column(
-    data_type{type_to_id<offset_type>()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
+  auto source_offsets =
+    make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::offset_type>()},
+                            num_rows + 1,
+                            cudf::mask_state::UNALLOCATED,
+                            stream,
+                            mr);
+  auto target_offsets =
+    make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::offset_type>()},
+                            num_rows + 1,
+                            cudf::mask_state::UNALLOCATED,
+                            stream,
+                            mr);
 
   thrust::sequence(rmm::exec_policy(stream),
-                   source_offsets->mutable_view().begin<offset_type>(),
-                   source_offsets->mutable_view().end<offset_type>(),
+                   source_offsets->mutable_view().begin<cudf::offset_type>(),
+                   source_offsets->mutable_view().end<cudf::offset_type>(),
                    0,
                    num_elements_per_row);
   thrust::sequence(rmm::exec_policy(stream),
-                   target_offsets->mutable_view().begin<offset_type>(),
-                   target_offsets->mutable_view().end<offset_type>(),
+                   target_offsets->mutable_view().begin<cudf::offset_type>(),
+                   target_offsets->mutable_view().end<cudf::offset_type>(),
                    0,
                    num_elements_per_row);
 
@@ -78,37 +90,40 @@ void BM_lists_scatter(::benchmark::State& state)
                                   std::move(source_offsets),
                                   std::move(source_base_col),
                                   0,
-                                  cudf::create_null_mask(num_rows, mask_state::UNALLOCATED),
+                                  cudf::create_null_mask(num_rows, cudf::mask_state::UNALLOCATED),
                                   stream,
                                   mr);
   auto target = make_lists_column(num_rows,
                                   std::move(target_offsets),
                                   std::move(target_base_col),
                                   0,
-                                  cudf::create_null_mask(num_rows, mask_state::UNALLOCATED),
+                                  cudf::create_null_mask(num_rows, cudf::mask_state::UNALLOCATED),
                                   stream,
                                   mr);
 
-  auto scatter_map = make_fixed_width_column(
-    data_type{type_to_id<size_type>()}, num_rows, mask_state::UNALLOCATED, stream, mr);
+  auto scatter_map   = make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
+                                             num_rows,
+                                             cudf::mask_state::UNALLOCATED,
+                                             stream,
+                                             mr);
   auto m_scatter_map = scatter_map->mutable_view();
   thrust::sequence(rmm::exec_policy(stream),
-                   m_scatter_map.begin<size_type>(),
-                   m_scatter_map.end<size_type>(),
+                   m_scatter_map.begin<cudf::size_type>(),
+                   m_scatter_map.end<cudf::size_type>(),
                    num_rows - 1,
                    -1);
 
   if (not coalesce) {
     thrust::default_random_engine g;
     thrust::shuffle(rmm::exec_policy(stream),
-                    m_scatter_map.begin<size_type>(),
-                    m_scatter_map.begin<size_type>(),
+                    m_scatter_map.begin<cudf::size_type>(),
+                    m_scatter_map.begin<cudf::size_type>(),
                     g);
   }
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, mr);
+    scatter(cudf::table_view{{*source}}, *scatter_map, cudf::table_view{{*target}}, mr);
   }
 
   state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0) * 2 *
@@ -127,5 +142,3 @@ void BM_lists_scatter(::benchmark::State& state)
 
 SBM_BENCHMARK_DEFINE(double_type_colesce_o, double, true);
 SBM_BENCHMARK_DEFINE(double_type_colesce_x, double, false);
-
-}  // namespace cudf

From 304879115c9f6a817416a1db250c12ff46211b3b Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 9 Mar 2023 09:35:23 -0800
Subject: [PATCH 56/60] Change return type of
 `cudf::structs::detail::flatten_nested_columns` to smart pointer (#12878)

This changes the return type of `cudf::structs::detail::flatten_nested_columns` from `flattened_table` to `std::unique_ptr<flattened_table>`. Using smart pointer, we can forward declare the output and reduce dependency on the header `cudf/detail/structs/utilities.hpp`.

This also does some cleanup to remove the unused header `cudf/detail/structs/utilities.hpp` from many places.

Authors:
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/12878
---
 .../cudf/detail/groupby/sort_helper.hpp       |  3 +-
 cpp/include/cudf/detail/join.hpp              |  9 ++-
 cpp/include/cudf/detail/structs/utilities.hpp | 10 ++--
 cpp/src/groupby/groupby.cu                    |  3 +-
 cpp/src/groupby/sort/group_rank_scan.cu       |  1 -
 cpp/src/groupby/sort/sort_helper.cu           |  3 -
 cpp/src/join/hash_join.cu                     | 10 ++--
 cpp/src/reductions/simple.cuh                 |  1 -
 cpp/src/reductions/struct_minmax_util.cuh     |  7 ++-
 cpp/src/search/contains_table.cu              |  4 +-
 cpp/src/sort/is_sorted.cu                     |  1 -
 cpp/src/sort/sort_impl.cuh                    |  3 +-
 cpp/src/structs/utilities.cpp                 | 31 ++++++----
 cpp/tests/structs/utilities_tests.cpp         | 60 ++++++++-----------
 14 files changed, 70 insertions(+), 76 deletions(-)

diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp
index 3ef7a7d5acf..e2510d75a83 100644
--- a/cpp/include/cudf/detail/groupby/sort_helper.hpp
+++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,6 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
-#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 
diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
index 2dfe31091ac..e539b1a34c8 100644
--- a/cpp/include/cudf/detail/join.hpp
+++ b/cpp/include/cudf/detail/join.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,7 +16,6 @@
 #pragma once
 
 #include <cudf/column/column.hpp>
-#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
@@ -37,6 +36,10 @@
 template <typename T>
 class default_allocator;
 
+namespace cudf::structs::detail {
+class flattened_table;
+}
+
 namespace cudf {
 namespace detail {
 
@@ -74,7 +77,7 @@ struct hash_join {
   rmm::device_buffer const _composite_bitmask;  ///< Bitmask to denote whether a row is valid
   cudf::null_equality const _nulls_equal;       ///< whether to consider nulls as equal
   cudf::table_view _build;                      ///< input table to build the hash map
-  cudf::structs::detail::flattened_table
+  std::unique_ptr<cudf::structs::detail::flattened_table>
     _flattened_build_table;  ///< flattened data structures for `_build`
   map_type _hash_table;      ///< hash table built on `_build`
 
diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp
index 1ac15e3bd04..4a708d2fb51 100644
--- a/cpp/include/cudf/detail/structs/utilities.hpp
+++ b/cpp/include/cudf/detail/structs/utilities.hpp
@@ -162,14 +162,14 @@ class flattened_table {
  * @param input input table to be flattened
  * @param column_order column order for input table
  * @param null_precedence null order for input table
- * @param nullability force output to have nullability columns even if input columns
- * are all valid
+ * @param nullability force output to have nullability columns even if input columns are all valid
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate new device memory
- * @return `flatten_result` with flattened table, flattened column order, flattened null precedence,
- * alongside the supporting columns and device_buffers for the flattened table.
+ * @return A pointer of type `flattened_table` containing flattened columns, flattened column
+ *         orders, flattened null precedence, alongside the supporting columns and device_buffers
+ *         for the flattened table.
  */
-[[nodiscard]] flattened_table flatten_nested_columns(
+[[nodiscard]] std::unique_ptr<flattened_table> flatten_nested_columns(
   table_view const& input,
   std::vector<order> const& column_order,
   std::vector<null_order> const& null_precedence,
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index 0e90848af3a..1979108eaa2 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,7 +25,6 @@
 #include <cudf/detail/groupby/group_replace_nulls.hpp>
 #include <cudf/detail/groupby/sort_helper.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/groupby.hpp>
diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu
index 479ce166724..9ebac957e8f 100644
--- a/cpp/src/groupby/sort/group_rank_scan.cu
+++ b/cpp/src/groupby/sort/group_rank_scan.cu
@@ -21,7 +21,6 @@
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
-#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/device_operators.cuh>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/span.hpp>
diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu
index b53955472b1..ebafcd75e6d 100644
--- a/cpp/src/groupby/sort/sort_helper.cu
+++ b/cpp/src/groupby/sort/sort_helper.cu
@@ -26,7 +26,6 @@
 #include <cudf/detail/labeling/label_segments.cuh>
 #include <cudf/detail/scatter.hpp>
 #include <cudf/detail/sorting.hpp>
-#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/strings/string_view.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
@@ -61,8 +60,6 @@ sort_groupby_helper::sort_groupby_helper(table_view const& keys,
     _include_null_keys(include_null_keys),
     _null_precedence(null_precedence)
 {
-  using namespace cudf::structs::detail;
-
   // Cannot depend on caller's sorting if the column contains nulls,
   // and null values are to be excluded.
   // Re-sort the data, to filter out nulls more easily.
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index e87fa546527..7fb35e179e9 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -300,7 +300,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
   // need to store off the owning structures for some of the views in _build
   _flattened_build_table = structs::detail::flatten_nested_columns(
     build, {}, {}, structs::detail::column_nullability::FORCE, stream);
-  _build = _flattened_build_table;
+  _build = _flattened_build_table->flattened_columns();
 
   if (_is_empty) { return; }
 
@@ -358,7 +358,7 @@ std::size_t hash_join<Hasher>::inner_join_size(cudf::table_view const& probe,
 
   auto flattened_probe = structs::detail::flatten_nested_columns(
     probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
-  auto const flattened_probe_table = flattened_probe.flattened_columns();
+  auto const flattened_probe_table = flattened_probe->flattened_columns();
 
   auto build_table_ptr           = cudf::table_device_view::create(_build, stream);
   auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream);
@@ -383,7 +383,7 @@ std::size_t hash_join<Hasher>::left_join_size(cudf::table_view const& probe,
 
   auto flattened_probe = structs::detail::flatten_nested_columns(
     probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
-  auto const flattened_probe_table = flattened_probe.flattened_columns();
+  auto const flattened_probe_table = flattened_probe->flattened_columns();
 
   auto build_table_ptr           = cudf::table_device_view::create(_build, stream);
   auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream);
@@ -409,7 +409,7 @@ std::size_t hash_join<Hasher>::full_join_size(cudf::table_view const& probe,
 
   auto flattened_probe = structs::detail::flatten_nested_columns(
     probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
-  auto const flattened_probe_table = flattened_probe.flattened_columns();
+  auto const flattened_probe_table = flattened_probe->flattened_columns();
 
   auto build_table_ptr           = cudf::table_device_view::create(_build, stream);
   auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream);
@@ -476,7 +476,7 @@ hash_join<Hasher>::compute_hash_join(cudf::table_view const& probe,
 
   auto flattened_probe = structs::detail::flatten_nested_columns(
     probe, {}, {}, structs::detail::column_nullability::FORCE, stream);
-  auto const flattened_probe_table = flattened_probe.flattened_columns();
+  auto const flattened_probe_table = flattened_probe->flattened_columns();
 
   CUDF_EXPECTS(_build.num_columns() == flattened_probe_table.num_columns(),
                "Mismatch in number of columns to be joined on");
diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh
index 5fe7b91e28a..c7c0d400106 100644
--- a/cpp/src/reductions/simple.cuh
+++ b/cpp/src/reductions/simple.cuh
@@ -20,7 +20,6 @@
 
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/reduction.cuh>
-#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/dictionary/detail/iterator.cuh>
 #include <cudf/dictionary/dictionary_column_view.hpp>
diff --git a/cpp/src/reductions/struct_minmax_util.cuh b/cpp/src/reductions/struct_minmax_util.cuh
index c4fc5e1cfd6..796d10a3477 100644
--- a/cpp/src/reductions/struct_minmax_util.cuh
+++ b/cpp/src/reductions/struct_minmax_util.cuh
@@ -87,7 +87,7 @@ auto static constexpr DEFAULT_NULL_ORDER = cudf::null_order::BEFORE;
  */
 class comparison_binop_generator {
  private:
-  cudf::structs::detail::flattened_table const flattened_input;
+  std::unique_ptr<cudf::structs::detail::flattened_table> const flattened_input;
   std::unique_ptr<table_device_view, std::function<void(table_device_view*)>> const
     d_flattened_input_ptr;
   bool const is_min_op;
@@ -103,13 +103,14 @@ class comparison_binop_generator {
         std::vector<null_order>{DEFAULT_NULL_ORDER},
         cudf::structs::detail::column_nullability::MATCH_INCOMING,
         stream)},
-      d_flattened_input_ptr{table_device_view::create(flattened_input, stream)},
+      d_flattened_input_ptr{
+        table_device_view::create(flattened_input->flattened_columns(), stream)},
       is_min_op(is_min_op),
       has_nulls{has_nested_nulls(table_view{{input}})},
       null_orders_dvec(0, stream)
   {
     if (is_min_op) {
-      null_orders = flattened_input.null_orders();
+      null_orders = flattened_input->null_orders();
       // If the input column has nulls (at the top level), null structs are excluded from the
       // operations, and that is equivalent to considering top-level nulls as larger than all other
       // non-null STRUCT elements (if finding for ARGMIN), or smaller than all other non-null STRUCT
diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu
index 90eeda12480..f770b4598cf 100644
--- a/cpp/src/search/contains_table.cu
+++ b/cpp/src/search/contains_table.cu
@@ -329,8 +329,8 @@ rmm::device_uvector<bool> contains_without_lists_or_nans(table_view const& hayst
     structs::detail::flatten_nested_columns(haystack, {}, {}, flatten_nullability, stream);
   auto const needles_flattened_tables =
     structs::detail::flatten_nested_columns(needles, {}, {}, flatten_nullability, stream);
-  auto const haystack_flattened = haystack_flattened_tables.flattened_columns();
-  auto const needles_flattened  = needles_flattened_tables.flattened_columns();
+  auto const haystack_flattened = haystack_flattened_tables->flattened_columns();
+  auto const needles_flattened  = needles_flattened_tables->flattened_columns();
   auto const haystack_tdv_ptr   = table_device_view::create(haystack_flattened, stream);
   auto const needles_tdv_ptr    = table_device_view::create(needles_flattened, stream);
 
diff --git a/cpp/src/sort/is_sorted.cu b/cpp/src/sort/is_sorted.cu
index 356c58b1c22..4c5ad1ef0ea 100644
--- a/cpp/src/sort/is_sorted.cu
+++ b/cpp/src/sort/is_sorted.cu
@@ -15,7 +15,6 @@
  */
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
diff --git a/cpp/src/sort/sort_impl.cuh b/cpp/src/sort/sort_impl.cuh
index fc024b42616..421a998c86f 100644
--- a/cpp/src/sort/sort_impl.cuh
+++ b/cpp/src/sort/sort_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,6 @@
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/gather.hpp>
-#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp
index 5b853dafe57..d30349aaf9c 100644
--- a/cpp/src/structs/utilities.cpp
+++ b/cpp/src/structs/utilities.cpp
@@ -193,23 +193,30 @@ struct table_flattener {
       }
     }
 
-    return flattened_table{table_view{flat_columns},
-                           std::move(flat_column_order),
-                           std::move(flat_null_precedence),
-                           std::move(validity_as_column),
-                           std::move(nullable_data)};
+    return std::make_unique<flattened_table>(table_view{flat_columns},
+                                             std::move(flat_column_order),
+                                             std::move(flat_null_precedence),
+                                             std::move(validity_as_column),
+                                             std::move(nullable_data));
   }
 };
 
-flattened_table flatten_nested_columns(table_view const& input,
-                                       std::vector<order> const& column_order,
-                                       std::vector<null_order> const& null_precedence,
-                                       column_nullability nullability,
-                                       rmm::cuda_stream_view stream,
-                                       rmm::mr::device_memory_resource* mr)
+std::unique_ptr<flattened_table> flatten_nested_columns(
+  table_view const& input,
+  std::vector<order> const& column_order,
+  std::vector<null_order> const& null_precedence,
+  column_nullability nullability,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   auto const has_struct = std::any_of(input.begin(), input.end(), is_struct);
-  if (not has_struct) { return flattened_table{input, column_order, null_precedence, {}, {}}; }
+  if (not has_struct) {
+    return std::make_unique<flattened_table>(input,
+                                             column_order,
+                                             null_precedence,
+                                             std::vector<std::unique_ptr<column>>{},
+                                             temporary_nullable_data{});
+  }
 
   return table_flattener{input, column_order, null_precedence, nullability, stream, mr}();
 }
diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp
index 5b560f22b05..e92b96553c0 100644
--- a/cpp/tests/structs/utilities_tests.cpp
+++ b/cpp/tests/structs/utilities_tests.cpp
@@ -53,12 +53,11 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel)
   auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}};
   auto nums_col  = nums{{0, 1, 2}, cudf::test::iterators::null_at(6)};
 
-  auto table = cudf::table_view{{lists_col, nums_col}};
+  auto table           = cudf::table_view{{lists_col, nums_col}};
+  auto flattened_table = cudf::structs::detail::flatten_nested_columns(
+    table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(
-    table,
-    cudf::structs::detail::flatten_nested_columns(
-      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table, flattened_table->flattened_columns());
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
@@ -91,12 +90,11 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs)
     {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
   auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};
 
-  auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};
+  auto table           = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};
+  auto flattened_table = cudf::structs::detail::flatten_nested_columns(
+    table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(
-    table,
-    cudf::structs::detail::flatten_nested_columns(
-      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table, flattened_table->flattened_columns());
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
@@ -120,10 +118,9 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
   auto expected = cudf::table_view{
     {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(
-    expected,
-    cudf::structs::detail::flatten_nested_columns(
-      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
+  auto flattened_table = cudf::structs::detail::flatten_nested_columns(
+    table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
@@ -149,10 +146,9 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
   auto expected = cudf::table_view{
     {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(
-    expected,
-    cudf::structs::detail::flatten_nested_columns(
-      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
+  auto flattened_table = cudf::structs::detail::flatten_nested_columns(
+    table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
@@ -189,10 +185,9 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
                                     expected_nums_col_3,
                                     expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(
-    expected,
-    cudf::structs::detail::flatten_nested_columns(
-      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
+  auto flattened_table = cudf::structs::detail::flatten_nested_columns(
+    table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
@@ -230,10 +225,9 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
                                     expected_nums_col_3,
                                     expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(
-    expected,
-    cudf::structs::detail::flatten_nested_columns(
-      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
+  auto flattened_table = cudf::structs::detail::flatten_nested_columns(
+    table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
@@ -272,10 +266,9 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
                                     expected_nums_col_3,
                                     expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(
-    expected,
-    cudf::structs::detail::flatten_nested_columns(
-      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
+  auto flattened_table = cudf::structs::detail::flatten_nested_columns(
+    table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
@@ -314,10 +307,9 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
                                     expected_nums_col_3,
                                     expected_strings_col}};
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(
-    expected,
-    cudf::structs::detail::flatten_nested_columns(
-      table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream()));
+  auto flattened_table = cudf::structs::detail::flatten_nested_columns(
+    table, {}, {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns());
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)

From 52c675a2402f806bf3e56dd8d382e8e84a401dfb Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Thu, 9 Mar 2023 12:01:27 -0600
Subject: [PATCH 57/60] Allow casting from `UDFString` back to `StringView` to
 call methods in `strings_udf` (#12363)

This PR adds some code to cast a `UDFString` to a `StringView` which unblocks UDFs that end up calling further transformations on strings that have already been returned by other functions. It works by registering a set of attributes to `UDFString` instances that mirror the ones attached to `StringView`, and introducing lowering that allows a cast. The cast ultimately calls a shim function which wraps the `cudf::string_view` casting operator of `udf_string`.

Authors:
  - https://github.com/brandon-b-miller
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12363
---
 python/cudf/cudf/core/udf/masked_typing.py    | 16 +++-
 python/cudf/cudf/core/udf/strings_lowering.py | 90 ++++++++++++++-----
 python/cudf/cudf/core/udf/strings_typing.py   |  9 +-
 python/cudf/cudf/testing/_utils.py            | 39 ++++++++
 python/cudf/cudf/tests/test_string_udfs.py    | 44 ++++++---
 python/cudf/cudf/tests/test_udf_masked_ops.py | 33 +++++++
 python/cudf/udf_cpp/shim.cu                   | 11 +++
 7 files changed, 205 insertions(+), 37 deletions(-)

diff --git a/python/cudf/cudf/core/udf/masked_typing.py b/python/cudf/cudf/core/udf/masked_typing.py
index 30caa641701..306e147886f 100644
--- a/python/cudf/cudf/core/udf/masked_typing.py
+++ b/python/cudf/cudf/core/udf/masked_typing.py
@@ -441,7 +441,8 @@ def generic(self, args, kws):
 # Strings functions and utilities
 def _is_valid_string_arg(ty):
     return (
-        isinstance(ty, MaskedType) and isinstance(ty.value_type, StringView)
+        isinstance(ty, MaskedType)
+        and isinstance(ty.value_type, (StringView, UDFString))
     ) or isinstance(ty, types.StringLiteral)
 
 
@@ -465,9 +466,9 @@ class MaskedStringFunction(AbstractTemplate):
 @register_masked_string_function(len)
 def len_typing(self, args, kws):
     if isinstance(args[0], MaskedType) and isinstance(
-        args[0].value_type, StringView
+        args[0].value_type, (StringView, UDFString)
     ):
-        return nb_signature(MaskedType(size_type), args[0])
+        return nb_signature(MaskedType(size_type), MaskedType(string_view))
     elif isinstance(args[0], types.StringLiteral) and len(args) == 1:
         return nb_signature(size_type, args[0])
 
@@ -635,4 +636,13 @@ def resolve_valid(self, mod):
         create_masked_unary_attr(f"MaskedType.{func}", udf_string),
     )
 
+
+class MaskedUDFStringAttrs(MaskedStringViewAttrs):
+    key = MaskedType(udf_string)
+
+    def resolve_value(self, mod):
+        return udf_string
+
+
 cuda_decl_registry.register_attr(MaskedStringViewAttrs)
+cuda_decl_registry.register_attr(MaskedUDFStringAttrs)
diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py
index 00905f72cda..a53722f505d 100644
--- a/python/cudf/cudf/core/udf/strings_lowering.py
+++ b/python/cudf/cudf/core/udf/strings_lowering.py
@@ -156,10 +156,34 @@ def cast_string_view_to_udf_string(context, builder, fromty, toty, val):
     return result._getvalue()
 
 
+@cuda_lowering_registry.lower_cast(udf_string, string_view)
+def cast_udf_string_to_string_view(context, builder, fromty, toty, val):
+    udf_str_ptr = builder.alloca(default_manager[fromty].get_value_type())
+    sv_ptr = builder.alloca(default_manager[toty].get_value_type())
+    builder.store(val, udf_str_ptr)
+
+    context.compile_internal(
+        builder,
+        call_create_string_view_from_udf_string,
+        nb_signature(types.void, _UDF_STRING_PTR, _STR_VIEW_PTR),
+        (udf_str_ptr, sv_ptr),
+    )
+
+    result = cgutils.create_struct_proxy(string_view)(
+        context, builder, value=builder.load(sv_ptr)
+    )
+
+    return result._getvalue()
+
+
 # utilities
 _create_udf_string_from_string_view = cuda.declare_device(
     "udf_string_from_string_view",
-    types.void(types.CPointer(string_view), types.CPointer(udf_string)),
+    types.void(_STR_VIEW_PTR, _UDF_STRING_PTR),
+)
+_create_string_view_from_udf_string = cuda.declare_device(
+    "string_view_from_udf_string",
+    types.void(_UDF_STRING_PTR, _STR_VIEW_PTR),
 )
 
 
@@ -167,6 +191,10 @@ def call_create_udf_string_from_string_view(sv, udf_str):
     _create_udf_string_from_string_view(sv, udf_str)
 
 
+def call_create_string_view_from_udf_string(udf_str, sv):
+    _create_string_view_from_udf_string(udf_str, sv)
+
+
 # String function implementations
 def call_len_string_view(st):
     return _string_view_len(st)
@@ -216,6 +244,7 @@ def call_string_view_replace(result, src, to_replace, replacement):
 
 
 @cuda_lower("StringView.replace", string_view, string_view, string_view)
+@cuda_lower("UDFString.replace", string_view, string_view, string_view)
 def replace_impl(context, builder, sig, args):
     src_ptr = builder.alloca(args[0].type)
     to_replace_ptr = builder.alloca(args[1].type)
@@ -292,6 +321,20 @@ def binary_func_impl(context, builder, sig, args):
                 )
                 return result._getvalue()
 
+        # binary_func can be attribute-like: str.binary_func
+        # or operator-like: binary_func(str, other)
+        if isinstance(binary_func, str):
+            binary_func_impl = cuda_lower(
+                f"StringView.{binary_func}", string_view, string_view
+            )(binary_func_impl)
+            binary_func_impl = cuda_lower(
+                f"UDFString.{binary_func}", string_view, string_view
+            )(binary_func_impl)
+        else:
+            binary_func_impl = cuda_lower(
+                binary_func, string_view, string_view
+            )(binary_func_impl)
+
         return binary_func_impl
 
     return deco
@@ -332,42 +375,42 @@ def lt_impl(st, rhs):
     return _string_view_lt(st, rhs)
 
 
-@create_binary_string_func("StringView.strip", udf_string)
+@create_binary_string_func("strip", udf_string)
 def strip_impl(result, to_strip, strip_char):
     return _string_view_strip(result, to_strip, strip_char)
 
 
-@create_binary_string_func("StringView.lstrip", udf_string)
+@create_binary_string_func("lstrip", udf_string)
 def lstrip_impl(result, to_strip, strip_char):
     return _string_view_lstrip(result, to_strip, strip_char)
 
 
-@create_binary_string_func("StringView.rstrip", udf_string)
+@create_binary_string_func("rstrip", udf_string)
 def rstrip_impl(result, to_strip, strip_char):
     return _string_view_rstrip(result, to_strip, strip_char)
 
 
-@create_binary_string_func("StringView.startswith", types.boolean)
+@create_binary_string_func("startswith", types.boolean)
 def startswith_impl(sv, substr):
     return _string_view_startswith(sv, substr)
 
 
-@create_binary_string_func("StringView.endswith", types.boolean)
+@create_binary_string_func("endswith", types.boolean)
 def endswith_impl(sv, substr):
     return _string_view_endswith(sv, substr)
 
 
-@create_binary_string_func("StringView.count", size_type)
+@create_binary_string_func("count", size_type)
 def count_impl(st, substr):
     return _string_view_count(st, substr)
 
 
-@create_binary_string_func("StringView.find", size_type)
+@create_binary_string_func("find", size_type)
 def find_impl(sv, substr):
     return _string_view_find(sv, substr)
 
 
-@create_binary_string_func("StringView.rfind", size_type)
+@create_binary_string_func("rfind", size_type)
 def rfind_impl(sv, substr):
     return _string_view_rfind(sv, substr)
 
@@ -380,7 +423,8 @@ def create_unary_identifier_func(id_func):
     """
 
     def deco(cuda_func):
-        @cuda_lower(id_func, string_view)
+        @cuda_lower(f"StringView.{id_func}", string_view)
+        @cuda_lower(f"UDFString.{id_func}", string_view)
         def id_func_impl(context, builder, sig, args):
             str_ptr = builder.alloca(args[0].type)
             builder.store(args[0], str_ptr)
@@ -413,7 +457,8 @@ def create_upper_or_lower(id_func):
     """
 
     def deco(cuda_func):
-        @cuda_lower(id_func, string_view)
+        @cuda_lower(f"StringView.{id_func}", string_view)
+        @cuda_lower(f"UDFString.{id_func}", string_view)
         def id_func_impl(context, builder, sig, args):
             str_ptr = builder.alloca(args[0].type)
             builder.store(args[0], str_ptr)
@@ -463,62 +508,63 @@ def id_func_impl(context, builder, sig, args):
     return deco
 
 
-@create_upper_or_lower("StringView.upper")
+@create_upper_or_lower("upper")
 def upper_impl(result, st, flags, cases, special):
     return _string_view_upper(result, st, flags, cases, special)
 
 
-@create_upper_or_lower("StringView.lower")
+@create_upper_or_lower("lower")
 def lower_impl(result, st, flags, cases, special):
     return _string_view_lower(result, st, flags, cases, special)
 
 
-@create_unary_identifier_func("StringView.isdigit")
+@create_unary_identifier_func("isdigit")
 def isdigit_impl(st, tbl):
     return _string_view_isdigit(st, tbl)
 
 
-@create_unary_identifier_func("StringView.isalnum")
+@create_unary_identifier_func("isalnum")
 def isalnum_impl(st, tbl):
     return _string_view_isalnum(st, tbl)
 
 
-@create_unary_identifier_func("StringView.isalpha")
+@create_unary_identifier_func("isalpha")
 def isalpha_impl(st, tbl):
     return _string_view_isalpha(st, tbl)
 
 
-@create_unary_identifier_func("StringView.isnumeric")
+@create_unary_identifier_func("isnumeric")
 def isnumeric_impl(st, tbl):
     return _string_view_isnumeric(st, tbl)
 
 
-@create_unary_identifier_func("StringView.isdecimal")
+@create_unary_identifier_func("isdecimal")
 def isdecimal_impl(st, tbl):
     return _string_view_isdecimal(st, tbl)
 
 
-@create_unary_identifier_func("StringView.isspace")
+@create_unary_identifier_func("isspace")
 def isspace_impl(st, tbl):
     return _string_view_isspace(st, tbl)
 
 
-@create_unary_identifier_func("StringView.isupper")
+@create_unary_identifier_func("isupper")
 def isupper_impl(st, tbl):
     return _string_view_isupper(st, tbl)
 
 
-@create_unary_identifier_func("StringView.islower")
+@create_unary_identifier_func("islower")
 def islower_impl(st, tbl):
     return _string_view_islower(st, tbl)
 
 
-@create_unary_identifier_func("StringView.istitle")
+@create_unary_identifier_func("istitle")
 def istitle_impl(st, tbl):
     return _string_view_istitle(st, tbl)
 
 
 @cuda_lower(len, MaskedType(string_view))
+@cuda_lower(len, MaskedType(udf_string))
 def masked_len_impl(context, builder, sig, args):
     ret = cgutils.create_struct_proxy(sig.return_type)(context, builder)
     masked_sv_ty = sig.args[0]
diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
index 7c8429d9997..50d34be40a0 100644
--- a/python/cudf/cudf/core/udf/strings_typing.py
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -123,7 +123,7 @@ def generic(self, args, kws):
             # string_view -> int32
             # udf_string -> int32
             # literal -> int32
-            return nb_signature(size_type, args[0])
+            return nb_signature(size_type, string_view)
 
 
 def register_stringview_binaryop(op, retty):
@@ -257,7 +257,14 @@ def resolve_replace(self, mod):
         create_identifier_attr(func, udf_string),
     )
 
+
+@cuda_decl_registry.register_attr
+class UDFStringAttrs(StringViewAttrs):
+    key = udf_string
+
+
 cuda_decl_registry.register_attr(StringViewAttrs)
+cuda_decl_registry.register_attr(UDFStringAttrs)
 
 register_stringview_binaryop(operator.eq, types.boolean)
 register_stringview_binaryop(operator.ne, types.boolean)
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 061bad1d44b..24a0b69a48b 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -11,11 +11,17 @@
 import numpy as np
 import pandas as pd
 import pytest
+from numba.core.typing import signature as nb_signature
+from numba.core.typing.templates import AbstractTemplate
+from numba.cuda.cudadecl import registry as cuda_decl_registry
+from numba.cuda.cudaimpl import lower as cuda_lower
 from pandas import testing as tm
 
 import cudf
 from cudf._lib.null_mask import bitmask_allocation_size_bytes
 from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
+from cudf.core.udf.strings_lowering import cast_string_view_to_udf_string
+from cudf.core.udf.strings_typing import StringView, string_view, udf_string
 from cudf.utils import dtypes as dtypeutils
 
 supported_numpy_dtypes = [
@@ -387,3 +393,36 @@ def expect_warning_if(condition, warning=FutureWarning, *args, **kwargs):
             yield
     else:
         yield
+
+
+def sv_to_udf_str(sv):
+    """
+    Cast a string_view object to a udf_string object
+
+    This placeholder function never runs in python
+    It exists only for numba to have something to replace
+    with the typing and lowering code below
+
+    This is similar conceptually to needing a translation
+    engine to emit an expression in target language "B" when
+    there is no equivalent in the source language "A" to
+    translate from. This function effectively defines the
+    expression in language "A" and the associated typing
+    and lowering describe the translation process, despite
+    the expression having no meaning in language "A"
+    """
+    pass
+
+
+@cuda_decl_registry.register_global(sv_to_udf_str)
+class StringViewToUDFStringDecl(AbstractTemplate):
+    def generic(self, args, kws):
+        if isinstance(args[0], StringView) and len(args) == 1:
+            return nb_signature(udf_string, string_view)
+
+
+@cuda_lower(sv_to_udf_str, string_view)
+def sv_to_udf_str_testing_lowering(context, builder, sig, args):
+    return cast_string_view_to_udf_string(
+        context, builder, sig.args[0], sig.return_type, args[0]
+    )
diff --git a/python/cudf/cudf/tests/test_string_udfs.py b/python/cudf/cudf/tests/test_string_udfs.py
index a8de63be0f5..049dfdc8e30 100644
--- a/python/cudf/cudf/tests/test_string_udfs.py
+++ b/python/cudf/cudf/tests/test_string_udfs.py
@@ -21,14 +21,16 @@
     udf_string,
 )
 from cudf.core.udf.utils import _PTX_FILE, _get_extensionty_size
-from cudf.testing._utils import assert_eq
+from cudf.testing._utils import assert_eq, sv_to_udf_str
 
 
-def get_kernel(func, dtype, size):
+def get_kernels(func, dtype, size):
     """
-    Create a kernel for testing a single scalar string function
+    Create two kernels for testing a single scalar string function.
+    The first tests the function's action on a string_view object and
+    the second tests the same except using a udf_string object.
     Allocates an output vector with a dtype specified by the caller
-    The returned kernel executes the input function on each data
+    The returned kernels execute the input function on each data
     element of the input and returns the output into the output vector
     """
 
@@ -41,14 +43,24 @@ def get_kernel(func, dtype, size):
     sig = nb_signature(void, CPointer(string_view), outty)
 
     @cuda.jit(sig, link=[_PTX_FILE], extensions=[str_view_arg_handler])
-    def kernel(input_strings, output_col):
+    def string_view_kernel(input_strings, output_col):
         id = cuda.grid(1)
         if id < size:
             st = input_strings[id]
             result = func(st)
             output_col[id] = result
 
-    return kernel
+    @cuda.jit(sig, link=[_PTX_FILE], extensions=[str_view_arg_handler])
+    def udf_string_kernel(input_strings, output_col):
+        # test the string function with a udf_string as input
+        id = cuda.grid(1)
+        if id < size:
+            st = input_strings[id]
+            st = sv_to_udf_str(st)
+            result = func(st)
+            output_col[id] = result
+
+    return string_view_kernel, udf_string_kernel
 
 
 def run_udf_test(data, func, dtype):
@@ -70,15 +82,25 @@ def run_udf_test(data, func, dtype):
 
     cudf_column = cudf.core.column.as_column(data)
     str_views = column_to_string_view_array(cudf_column)
+    sv_kernel, udf_str_kernel = get_kernels(func, dtype, len(data))
 
-    kernel = get_kernel(func, dtype, len(data))
-    kernel.forall(len(data))(str_views, output)
+    expect = pd.Series(data).apply(func)
 
+    sv_kernel.forall(len(data))(str_views, output)
     if dtype == "str":
-        output = column_from_udf_string_array(output)
+        result = column_from_udf_string_array(output)
+    else:
+        result = output
 
-    got = cudf.Series(output, dtype=dtype)
-    expect = pd.Series(data).apply(func)
+    got = cudf.Series(result, dtype=dtype)
+    assert_eq(expect, got, check_dtype=False)
+    udf_str_kernel.forall(len(data))(str_views, output)
+    if dtype == "str":
+        result = column_from_udf_string_array(output)
+    else:
+        result = output
+
+    got = cudf.Series(result, dtype=dtype)
     assert_eq(expect, got, check_dtype=False)
 
 
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 4d54c3181b2..ab0205df677 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -14,11 +14,13 @@
     comparison_ops,
     unary_ops,
 )
+from cudf.core.udf.api import Masked
 from cudf.core.udf.utils import precompiled
 from cudf.testing._utils import (
     _decimal_series,
     assert_eq,
     parametrize_numeric_dtypes_pairwise,
+    sv_to_udf_str,
 )
 
 
@@ -71,6 +73,37 @@ def run_masked_udf_test(func, data, args=(), **kwargs):
     assert_eq(expect, obtain, **kwargs)
 
 
+def run_masked_string_udf_test(func, data, args=(), **kwargs):
+
+    gdf = data
+    pdf = data.to_pandas(nullable=True)
+
+    def row_wrapper(row):
+        st = row["str_col"]
+        return func(st)
+
+    expect = pdf.apply(row_wrapper, args=args, axis=1)
+
+    func = cuda.jit(device=True)(func)
+    obtain = gdf.apply(row_wrapper, args=args, axis=1)
+    assert_eq(expect, obtain, **kwargs)
+
+    # strings that come directly from input columns are backed by
+    # MaskedType(string_view) types. But new strings that are returned
+    # from functions or operators are backed by MaskedType(udf_string)
+    # types. We need to make sure all of our methods work on both kind
+    # of MaskedType. This function promotes the former to the latter
+    # prior to running the input function
+    def udf_string_wrapper(row):
+        masked_udf_str = Masked(
+            sv_to_udf_str(row["str_col"].value), row["str_col"].valid
+        )
+        return func(masked_udf_str)
+
+    obtain = gdf.apply(udf_string_wrapper, args=args, axis=1)
+    assert_eq(expect, obtain, **kwargs)
+
+
 def run_masked_udf_series(func, data, args=(), **kwargs):
     gsr = data
     psr = data.to_pandas(nullable=True)
diff --git a/python/cudf/udf_cpp/shim.cu b/python/cudf/udf_cpp/shim.cu
index 9223a54654e..63ad1039da6 100644
--- a/python/cudf/udf_cpp/shim.cu
+++ b/python/cudf/udf_cpp/shim.cu
@@ -239,6 +239,17 @@ extern "C" __device__ int udf_string_from_string_view(int* nb_retbal,
   return 0;
 }
 
+extern "C" __device__ int string_view_from_udf_string(int* nb_retval,
+                                                      void const* udf_str,
+                                                      void* str)
+{
+  auto udf_str_ptr = reinterpret_cast<udf_string const*>(udf_str);
+  auto sv_ptr      = new (str) cudf::string_view;
+  *sv_ptr          = cudf::string_view(*udf_str_ptr);
+
+  return 0;
+}
+
 extern "C" __device__ int strip(int* nb_retval,
                                 void* udf_str,
                                 void* const* to_strip,

From f5bb7b1822f255939b0aef3a0c00629740cac915 Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Thu, 9 Mar 2023 17:07:57 -0600
Subject: [PATCH 58/60] Fix an issue with parquet chunked reader undercounting
 string lengths. (#12859)

Fixes https://github.com/NVIDIA/spark-rapids/issues/7796

The way we were counting string lengths was causing some values to be discarded in some cases.  Essentially, dictionary indices are encoded in runs.  When decoding to a particular position, the code occasionally has to decode slightly past that.  The code that stores the indices buffers these extra values across calls, but the code that was adding up the lengths was just ignoring them.

The solution is to simply process all the string lengths in one batch (which also opens up the door to using a seperate warp to do this work in parallel with the rest of the level decoding happening at the time).

Also includes a minor optimization in the call to gpuInitStringDescriptors - a template parameter that controls whether or not we write out dictionary indices to the rolling buffer - a step which is unnecessary during preprocessing but we were doing anyway.

Authors:
  - https://github.com/nvdbaranec
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12859
---
 cpp/src/io/parquet/page_data.cu              | 53 ++++++++++++--------
 cpp/src/io/parquet/reader_impl_preprocess.cu |  6 ++-
 cpp/tests/io/parquet_chunked_reader_test.cpp | 47 +++++++++++++++++
 3 files changed, 83 insertions(+), 23 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index f377e833645..25b9f7fd285 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -328,7 +328,9 @@ __device__ void gpuDecodeStream(
  * @param[in] t Warp1 thread ID (0..31)
  *
  * @return A pair containing the new output position, and the total length of strings decoded (this
- * will only be valid on thread 0 and if sizes_only is true)
+ * will only be valid on thread 0 and if sizes_only is true). In the event that this function
+ * decodes strings beyond target_pos, the total length of strings returned will include these
+ * additional values.
  */
 template <bool sizes_only>
 __device__ cuda::std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s,
@@ -415,13 +417,9 @@ __device__ cuda::std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_st
     // if we're computing sizes, add the length(s)
     if constexpr (sizes_only) {
       int const len = [&]() {
-        if (t >= batch_len) { return 0; }
-        // we may end up decoding more indices than we asked for. so don't include those in the
-        // size calculation
-        if (pos + t >= target_pos) { return 0; }
-        // TODO:  refactor this with gpuGetStringData / gpuGetStringSize
+        if (t >= batch_len || (pos + t >= target_pos)) { return 0; }
         uint32_t const dict_pos = (s->dict_bits > 0) ? dict_idx * sizeof(string_index_pair) : 0;
-        if (target_pos && dict_pos < (uint32_t)s->dict_size) {
+        if (dict_pos < (uint32_t)s->dict_size) {
           const auto* src = reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
           return src->second;
         }
@@ -512,6 +510,7 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s* s, int target_pos, in
  *
  * @return Total length of strings processed
  */
+template <bool sizes_only>
 __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t)
 {
   int pos       = s->dict_pos;
@@ -532,8 +531,10 @@ __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int targ
       } else {
         len = 0;
       }
-      s->dict_idx[rolling_index(pos)] = k;
-      s->str_len[rolling_index(pos)]  = len;
+      if constexpr (!sizes_only) {
+        s->dict_idx[rolling_index(pos)] = k;
+        s->str_len[rolling_index(pos)]  = len;
+      }
       k += len;
       total_len += len;
       pos++;
@@ -1605,6 +1606,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
     uint32_t const warp_row_count_mask = ballot(is_new_row);
     int const is_new_leaf = (d >= s->nesting_info[max_depth - 1].max_def_level) ? 1 : 0;
     uint32_t const warp_leaf_count_mask = ballot(is_new_leaf);
+
     // is this thread within row bounds? on the first pass we don't know the bounds, so we will be
     // computing the full size of the column.  on the second pass, we will know our actual row
     // bounds, so the computation will cap sizes properly.
@@ -1656,18 +1658,27 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   }
 }
 
-__device__ size_type gpuGetStringSize(page_state_s* s, int target_count, int t)
+/**
+ * @brief Returns the total size in bytes of string char data in the page.
+ *
+ * This function expects the dictionary position to be at 0 and will traverse
+ * the entire thing.
+ *
+ * @param s The local page info
+ * @param t Thread index
+ */
+__device__ size_type gpuDecodeTotalPageStringSize(page_state_s* s, int t)
 {
-  auto dict_target_pos = target_count;
+  size_type target_pos = s->num_input_values;
   size_type str_len    = 0;
   if (s->dict_base) {
-    auto const [new_target_pos, len] = gpuDecodeDictionaryIndices<true>(s, target_count, t);
-    dict_target_pos                  = new_target_pos;
+    auto const [new_target_pos, len] = gpuDecodeDictionaryIndices<true>(s, target_pos, t);
+    target_pos                       = new_target_pos;
     str_len                          = len;
   } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
-    str_len = gpuInitStringDescriptors(s, target_count, t);
+    str_len = gpuInitStringDescriptors<true>(s, target_pos, t);
   }
-  if (!t) { *(volatile int32_t*)&s->dict_pos = dict_target_pos; }
+  if (!t) { *(volatile int32_t*)&s->dict_pos = target_pos; }
   return str_len;
 }
 
@@ -1797,14 +1808,14 @@ __global__ void __launch_bounds__(block_size)
 
       // process what we got back
       gpuUpdatePageSizes(s, actual_input_count, t, !is_base_pass);
-      if (compute_string_sizes) {
-        auto const str_len = gpuGetStringSize(s, s->input_leaf_count, t);
-        if (!t) { s->page.str_bytes += str_len; }
-      }
-
       target_input_count = actual_input_count + batch_size;
       __syncwarp();
     }
+
+    // retrieve total string size.
+    // TODO: investigate if it is possible to do this with a separate warp at the same time levels
+    // are being decoded above.
+    if (compute_string_sizes) { s->page.str_bytes = gpuDecodeTotalPageStringSize(s, t); }
   }
 
   // update output results:
@@ -1915,7 +1926,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
       } else if ((s->col.data_type & 7) == BOOLEAN) {
         src_target_pos = gpuDecodeRleBooleans(s, src_target_pos, t & 0x1f);
       } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
-        gpuInitStringDescriptors(s, src_target_pos, t & 0x1f);
+        gpuInitStringDescriptors<false>(s, src_target_pos, t & 0x1f);
       }
       if (t == 32) { *(volatile int32_t*)&s->dict_pos = src_target_pos; }
     } else {
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 6b6ad5a2f7d..6b5d4ba3640 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -834,13 +834,15 @@ void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view
     // skip dictionary pages
     if (p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; }
     printf(
-      "P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d)\n",
+      "P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d), "
+      "str_bytes(%d)\n",
       idx,
       p.src_col_schema,
       p.chunk_row,
       p.num_rows,
       p.skipped_values,
-      p.skipped_leaf_values);
+      p.skipped_leaf_values,
+      p.str_bytes);
   }
 }
 
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index ed95d8381d9..e13046eb90c 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -364,6 +364,53 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
   }
 }
 
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStringPrecise)
+{
+  auto constexpr num_rows = 60'000;
+
+  auto const generate_input = [num_rows](bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+
+    // strings                                                 Page    total bytes   cumulative
+    // 20000 rows alternating 1-4 chars each (50000 + 80004)   A0      130004        130004
+    // 20000 rows alternating 1-4 chars each (50000 + 80004)   A1      130004        260008
+    // ...
+    auto const strings = std::vector<std::string>{"a", "bbbb"};
+    auto const str_iter =
+      cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return strings[i % 2]; });
+    input_columns.emplace_back(strings_col(str_iter, str_iter + num_rows).release());
+
+    // Cumulative sizes:
+    // A0 :  130004
+    // A1 :  260008
+    // A2 :  390012
+    return write_file(input_columns,
+                      "chunked_read_with_strings_precise",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
+    );
+  };
+
+  auto const [expected_no_null, filepath_no_null] = generate_input(false);
+
+  // a chunk limit of 1 byte less than 2 pages should force it to produce 3 chunks:
+  // each 1 page in size
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 260'007);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  // a chunk limit of exactly equal to 2 pages should force it to produce 2 chunks
+  // pages 0-1 and page 2
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 260'008);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+}
+
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructs)
 {
   auto constexpr num_rows = 100'000;

From 02d3751e8af40a479b36003acdc63256b22803b9 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Thu, 9 Mar 2023 18:13:31 -0500
Subject: [PATCH 59/60] Make string methods return a Series with a useful Index
 (#12814)

Closes https://github.com/rapidsai/cudf/issues/12806

Many string methods like `character_ngrams` currently return a `Series` with the default index (`RangeIndex`). This PR makes it so that the index of the result corresponds to the index of the input.

More specifically, this PR changes the index of the result of the following string methods:

- [x] `character_ngrams`
- [x] `tokenize`
- [x] `character_tokenize`

Authors:
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12814
---
 python/cudf/cudf/core/column/string.py | 109 +++++++++++++------------
 python/cudf/cudf/tests/test_text.py    |  40 +++++++--
 2 files changed, 91 insertions(+), 58 deletions(-)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index d5ef5fb5d11..d9a6c6c4cd6 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -4564,22 +4564,22 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
         >>> ser = cudf.Series(data)
         >>> ser.str.tokenize()
         0      hello
+        0      world
+        1    goodbye
         1      world
+        2      hello
         2    goodbye
-        3      world
-        4      hello
-        5    goodbye
         dtype: object
         """
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
 
         if isinstance(delimiter, Column):
-            return self._return_or_inplace(
+            result = self._return_or_inplace(
                 libstrings._tokenize_column(self._column, delimiter),
                 retain_index=False,
             )
         elif isinstance(delimiter, cudf.Scalar):
-            return self._return_or_inplace(
+            result = self._return_or_inplace(
                 libstrings._tokenize_scalar(self._column, delimiter),
                 retain_index=False,
             )
@@ -4588,6 +4588,11 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
                 f"Expected a Scalar or Column\
                 for delimiters, but got {type(delimiter)}"
             )
+        if isinstance(self._parent, cudf.Series):
+            result.index = self._parent.index.repeat(  # type: ignore
+                self.token_count()
+            )
+        return result
 
     def detokenize(
         self, indices: "cudf.Series", separator: str = " "
@@ -4641,41 +4646,43 @@ def character_tokenize(self) -> SeriesOrIndex:
         >>> data = ["hello world", None, "goodbye, thank you."]
         >>> ser = cudf.Series(data)
         >>> ser.str.character_tokenize()
-        0     h
-        1     e
-        2     l
-        3     l
-        4     o
-        5
-        6     w
-        7     o
-        8     r
-        9     l
-        10    d
-        11    g
-        12    o
-        13    o
-        14    d
-        15    b
-        16    y
-        17    e
-        18    ,
-        19
-        20    t
-        21    h
-        22    a
-        23    n
-        24    k
-        25
-        26    y
-        27    o
-        28    u
-        29    .
+        0    h
+        0    e
+        0    l
+        0    l
+        0    o
+        0
+        0    w
+        0    o
+        0    r
+        0    l
+        0    d
+        2    g
+        2    o
+        2    o
+        2    d
+        2    b
+        2    y
+        2    e
+        2    ,
+        2
+        2    t
+        2    h
+        2    a
+        2    n
+        2    k
+        2
+        2    y
+        2    o
+        2    u
+        2    .
         dtype: object
         """
         result_col = libstrings.character_tokenize(self._column)
         if isinstance(self._parent, cudf.Series):
-            return cudf.Series(result_col, name=self._parent.name)
+            lengths = self.len().fillna(0)
+            index = self._parent.index.repeat(lengths)
+            return cudf.Series(result_col, name=self._parent.name, index=index)
         elif isinstance(self._parent, cudf.BaseIndex):
             return cudf.core.index.as_index(result_col, name=self._parent.name)
         else:
@@ -4780,20 +4787,20 @@ def character_ngrams(
         >>> str_series = cudf.Series(['abcd','efgh','xyz'])
         >>> str_series.str.character_ngrams(2)
         0    ab
-        1    bc
-        2    cd
-        3    ef
-        4    fg
-        5    gh
-        6    xy
-        7    yz
+        0    bc
+        0    cd
+        1    ef
+        1    fg
+        1    gh
+        2    xy
+        2    yz
         dtype: object
         >>> str_series.str.character_ngrams(3)
         0    abc
-        1    bcd
-        2    efg
-        3    fgh
-        4    xyz
+        0    bcd
+        1    efg
+        1    fgh
+        2    xyz
         dtype: object
         >>> str_series.str.character_ngrams(3,True)
         0    [abc, bcd]
@@ -4802,8 +4809,6 @@ def character_ngrams(
         dtype: list
         """
         ngrams = libstrings.generate_character_ngrams(self._column, n)
-        if as_list is False:
-            return self._return_or_inplace(ngrams, retain_index=False)
 
         # convert the output to a list by just generating the
         # offsets for the output list column
@@ -4820,7 +4825,11 @@ def character_ngrams(
             null_count=self._column.null_count,
             children=(oc, ngrams),
         )
-        return self._return_or_inplace(lc, retain_index=False)
+        result = self._return_or_inplace(lc, retain_index=True)
+
+        if isinstance(result, cudf.Series) and not as_list:
+            return result.explode()
+        return result
 
     def ngrams_tokenize(
         self, n: int = 2, delimiter: str = " ", separator: str = "_"
diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index 627bf0a68bb..89c428551e4 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 
 from io import StringIO
 
@@ -24,7 +24,7 @@ def test_tokenize():
         ]
     )
 
-    expected = cudf.Series(
+    expected_values = cudf.Series(
         [
             "the",
             "quick",
@@ -43,6 +43,8 @@ def test_tokenize():
             "sofa",
         ]
     )
+    expected_index = strings.index.repeat(strings.str.token_count())
+    expected = cudf.Series(expected_values, index=expected_index)
 
     actual = strings.str.tokenize()
 
@@ -231,7 +233,7 @@ def test_ngrams(n, separator, expected_values):
 
 
 @pytest.mark.parametrize(
-    "n, expected_values, as_list",
+    "n, expected_values, expected_index, as_list",
     [
         (
             2,
@@ -247,21 +249,41 @@ def test_ngrams(n, separator, expected_values):
                 "he",
                 "er",
                 "re",
+                cudf.NA,
             ],
+            [1, 1, 1, 2, 3, 4, 4, 4, 5, 5, 5, 6],
+            False,
+        ),
+        (
+            3,
+            [
+                "thi",
+                "his",
+                cudf.NA,
+                cudf.NA,
+                "boo",
+                "ook",
+                "her",
+                "ere",
+                cudf.NA,
+            ],
+            [1, 1, 2, 3, 4, 4, 5, 5, 6],
             False,
         ),
-        (3, ["thi", "his", "boo", "ook", "her", "ere"], False),
         (
             3,
             [["thi", "his"], [], [], ["boo", "ook"], ["her", "ere"], []],
+            [1, 2, 3, 4, 5, 6],
             True,
         ),
     ],
 )
-def test_character_ngrams(n, expected_values, as_list):
-    strings = cudf.Series(["this", "is", "my", "book", "here", ""])
+def test_character_ngrams(n, expected_values, expected_index, as_list):
+    strings = cudf.Series(
+        ["this", "is", "my", "book", "here", ""], index=[1, 2, 3, 4, 5, 6]
+    )
 
-    expected = cudf.Series(expected_values)
+    expected = cudf.Series(expected_values, index=expected_index)
 
     actual = strings.str.character_ngrams(n=n, as_list=as_list)
 
@@ -314,7 +336,7 @@ def test_character_tokenize_series():
             ),
         ]
     )
-    expected = cudf.Series(
+    expected_values = cudf.Series(
         [
             "h",
             "e",
@@ -402,6 +424,8 @@ def test_character_tokenize_series():
             "Ǆ",
         ]
     )
+    expected_index = sr.index.repeat(sr.str.len().fillna(0))
+    expected = cudf.Series(expected_values, index=expected_index)
 
     actual = sr.str.character_tokenize()
     assert_eq(expected, actual)

From e37bddb84be7a3a6915511702739f5b7e87386d7 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 10 Mar 2023 10:24:04 -0500
Subject: [PATCH 60/60] Remove return type from @return doxygen tags (#12908)

The doxygen `@return` does not require a type but only the description. However, the doxygen output looks cleaner without the type. There were only a few places in the public header files that needed to be corrected.
This technically a documentation-only change.
Found when working on #12904

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12908
---
 cpp/include/cudf/column/column.hpp            | 14 +++++-----
 .../cudf/column/column_device_view.cuh        | 18 ++++++-------
 cpp/include/cudf/column/column_view.hpp       | 26 +++++++++----------
 cpp/include/cudf/concatenate.hpp              |  3 +--
 cpp/include/cudf/copying.hpp                  | 16 ++++++------
 cpp/include/cudf/io/data_sink.hpp             |  6 ++---
 cpp/include/cudf/io/datasource.hpp            |  4 +--
 cpp/include/cudf/lists/contains.hpp           | 10 +++----
 cpp/include/cudf/lists/lists_column_view.hpp  |  6 ++---
 cpp/include/cudf/null_mask.hpp                | 14 +++++-----
 cpp/include/cudf/round.hpp                    |  4 +--
 cpp/include/cudf/sorting.hpp                  |  9 +++----
 .../cudf/table/experimental/row_operators.cuh | 10 +++----
 cpp/include/cudf/transform.hpp                |  4 +--
 cpp/include/cudf/types.hpp                    |  4 +--
 15 files changed, 72 insertions(+), 76 deletions(-)

diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
index c02991051d9..178fc92b399 100644
--- a/cpp/include/cudf/column/column.hpp
+++ b/cpp/include/cudf/column/column.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -259,7 +259,7 @@ class column {
    * @brief Returns a reference to the specified child
    *
    * @param child_index Index of the desired child
-   * @return column& Reference to the desired child
+   * @return Reference to the desired child
    */
   column& child(size_type child_index) noexcept { return *_children[child_index]; };
 
@@ -267,7 +267,7 @@ class column {
    * @brief Returns a const reference to the specified child
    *
    * @param child_index Index of the desired child
-   * @return column const& Const reference to the desired child
+   * @return Const reference to the desired child
    */
   [[nodiscard]] column const& child(size_type child_index) const noexcept
   {
@@ -306,7 +306,7 @@ class column {
    * @brief Creates an immutable, non-owning view of the column's data and
    * children.
    *
-   * @return column_view The immutable, non-owning view
+   * @return The immutable, non-owning view
    */
   [[nodiscard]] column_view view() const;
 
@@ -316,7 +316,7 @@ class column {
    * This allows passing a `column` object directly into a function that
    * requires a `column_view`. The conversion is automatic.
    *
-   * @return column_view Immutable, non-owning `column_view`
+   * @return Immutable, non-owning `column_view`
    */
   operator column_view() const { return this->view(); };
 
@@ -330,7 +330,7 @@ class column {
    * if not, the null count will be recomputed on the next invocation of
    *`null_count()`.
    *
-   * @return mutable_column_view The mutable, non-owning view
+   * @return The mutable, non-owning view
    */
   mutable_column_view mutable_view();
 
@@ -346,7 +346,7 @@ class column {
    * Otherwise, the null count will be recomputed on the next invocation of
    * `null_count()`.
    *
-   * @return mutable_column_view Mutable, non-owning `mutable_column_view`
+   * @return Mutable, non-owning `mutable_column_view`
    */
   operator mutable_column_view() { return this->mutable_view(); };
 
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index 1361866d0aa..b3e6ad0b99f 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -129,7 +129,7 @@ class alignas(16) column_device_view_base {
    * or `std::is_same_v<T,void>` are true.
    *
    * @tparam The type to cast to
-   * @return T const* Typed pointer to underlying data
+   * @return Typed pointer to underlying data
    */
   template <typename T = void,
             CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
@@ -151,7 +151,7 @@ class alignas(16) column_device_view_base {
    * false.
    *
    * @tparam T The type to cast to
-   * @return T const* Typed pointer to underlying data, including the offset
+   * @return Typed pointer to underlying data, including the offset
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
   [[nodiscard]] CUDF_HOST_DEVICE T const* data() const noexcept
@@ -990,7 +990,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * `data<T>()`.
    *
    * @tparam The type to cast to
-   * @return T* Typed pointer to underlying data
+   * @return Typed pointer to underlying data
    */
   template <typename T = void,
             CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
@@ -1009,7 +1009,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * @note If `offset() == 0`, then `head<T>() == data<T>()`
    *
    * @tparam T The type to cast to
-   * @return T* Typed pointer to underlying data, including the offset
+   * @return Typed pointer to underlying data, including the offset
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
   CUDF_HOST_DEVICE T* data() const noexcept
@@ -1078,7 +1078,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * `mutable_column_device_view::has_element_accessor<T>()` is false.
    *
    * @tparam T The desired type
-   * @return T* Pointer to the first element after casting
+   * @return Pointer to the first element after casting
    */
   template <typename T, CUDF_ENABLE_IF(mutable_column_device_view::has_element_accessor<T>())>
   iterator<T> begin()
@@ -1094,7 +1094,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * `mutable_column_device_view::has_element_accessor<T>()` is false.
    *
    * @tparam T The desired type
-   * @return T const* Pointer to one past the last element after casting
+   * @return Pointer to one past the last element after casting
    */
   template <typename T, CUDF_ENABLE_IF(mutable_column_device_view::has_element_accessor<T>())>
   iterator<T> end()
@@ -1106,7 +1106,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * @brief Returns the specified child
    *
    * @param child_index The index of the desired child
-   * @return column_view The requested child `column_view`
+   * @return The requested child `column_view`
    */
   [[nodiscard]] __device__ mutable_column_device_view child(size_type child_index) const noexcept
   {
@@ -1173,7 +1173,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * device view of the specified column and it's children.
    *
    * @param source_view The `column_view` to use for this calculation.
-   * @return size_t The size in bytes of the amount of memory needed to hold a
+   * @return The size in bytes of the amount of memory needed to hold a
    * device view of the specified column and it's children
    */
   static std::size_t extent(mutable_column_view source_view);
diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 217f88e67f9..4889a62bbe4 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -66,7 +66,7 @@ class column_view_base {
    * or `std::is_same_v<T,void>` are true.
    *
    * @tparam The type to cast to
-   * @return T const* Typed pointer to underlying data
+   * @return Typed pointer to underlying data
    */
   template <typename T = void,
             CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
@@ -85,7 +85,7 @@ class column_view_base {
    * false.
    *
    * @tparam T The type to cast to
-   * @return T const* Typed pointer to underlying data, including the offset
+   * @return Typed pointer to underlying data, including the offset
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
   T const* data() const noexcept
@@ -101,7 +101,7 @@ class column_view_base {
    * false.
    *
    * @tparam T The desired type
-   * @return T const* Pointer to the first element after casting
+   * @return Pointer to the first element after casting
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
   T const* begin() const noexcept
@@ -117,7 +117,7 @@ class column_view_base {
    * false.
    *
    * @tparam T The desired type
-   * @return T const* Pointer to one past the last element after casting
+   * @return Pointer to one past the last element after casting
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
   T const* end() const noexcept
@@ -389,7 +389,7 @@ class column_view : public detail::column_view_base {
    * @brief Returns the specified child
    *
    * @param child_index The index of the desired child
-   * @return column_view The requested child `column_view`
+   * @return The requested child `column_view`
    */
   [[nodiscard]] column_view child(size_type child_index) const noexcept
   {
@@ -553,7 +553,7 @@ class mutable_column_view : public detail::column_view_base {
    * column, and instead, accessing the elements should be done via `data<T>()`.
    *
    * @tparam The type to cast to
-   * @return T* Typed pointer to underlying data
+   * @return Typed pointer to underlying data
    */
   template <typename T = void,
             CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
@@ -572,7 +572,7 @@ class mutable_column_view : public detail::column_view_base {
    * @note If `offset() == 0`, then `head<T>() == data<T>()`
    *
    * @tparam T The type to cast to
-   * @return T* Typed pointer to underlying data, including the offset
+   * @return Typed pointer to underlying data, including the offset
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
   T* data() const noexcept
@@ -588,7 +588,7 @@ class mutable_column_view : public detail::column_view_base {
    * false.
    *
    * @tparam T The desired type
-   * @return T* Pointer to the first element after casting
+   * @return Pointer to the first element after casting
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
   T* begin() const noexcept
@@ -604,7 +604,7 @@ class mutable_column_view : public detail::column_view_base {
    * false.
    *
    * @tparam T The desired type
-   * @return T* Pointer to one past the last element after casting
+   * @return Pointer to one past the last element after casting
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
   T* end() const noexcept
@@ -639,7 +639,7 @@ class mutable_column_view : public detail::column_view_base {
    * @brief Returns a reference to the specified child
    *
    * @param child_index The index of the desired child
-   * @return mutable_column_view The requested child `mutable_column_view`
+   * @return The requested child `mutable_column_view`
    */
   [[nodiscard]] mutable_column_view child(size_type child_index) const noexcept
   {
@@ -670,7 +670,7 @@ class mutable_column_view : public detail::column_view_base {
   /**
    * @brief Converts a mutable view into an immutable view
    *
-   * @return column_view An immutable view of the mutable view's elements
+   * @return An immutable view of the mutable view's elements
    */
   operator column_view() const;
 
@@ -684,7 +684,7 @@ class mutable_column_view : public detail::column_view_base {
  * @brief Counts the number of descendants of the specified parent.
  *
  * @param parent The parent whose descendants will be counted
- * @return size_type The number of descendants of the parent
+ * @return The number of descendants of the parent
  */
 size_type count_descendants(column_view parent);
 
diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp
index b20c97b3c31..2b4eee607e2 100644
--- a/cpp/include/cudf/concatenate.hpp
+++ b/cpp/include/cudf/concatenate.hpp
@@ -40,8 +40,7 @@ namespace cudf {
  *
  * @param views host_span of column views whose bitmasks will be concatenated
  * @param mr Device memory resource used for allocating the new device_buffer
- * @return rmm::device_buffer A `device_buffer` containing the bitmasks of all
- * the column views in the views vector
+ * @return A `device_buffer` containing the bitmasks of all the column views in the views vector
  */
 rmm::device_buffer concatenate_masks(
   host_span<column_view const> views,
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 63c66335d2d..d5a3c930853 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,7 +78,7 @@ enum class out_of_bounds_policy : bool {
  * better performance. If `policy` is set to `DONT_CHECK` and there are out-of-bounds indices
  * in the gather map, the behavior is undefined. Defaults to `DONT_CHECK`.
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
- * @return std::unique_ptr<table> Result of the gather
+ * @return Result of the gather
  */
 std::unique_ptr<table> gather(
   table_view const& source_table,
@@ -211,7 +211,7 @@ enum class mask_allocation_policy {
  * @brief Initializes and returns an empty column of the same type as the `input`.
  *
  * @param[in] input Immutable view of input column to emulate
- * @return std::unique_ptr<column> An empty column of same type as `input`
+ * @return An empty column of same type as `input`
  */
 std::unique_ptr<column> empty_like(column_view const& input);
 
@@ -219,7 +219,7 @@ std::unique_ptr<column> empty_like(column_view const& input);
  * @brief Initializes and returns an empty column of the same type as the `input`.
  *
  * @param[in] input Scalar to emulate
- * @return std::unique_ptr<column> An empty column of same type as `input`
+ * @return An empty column of same type as `input`
  */
 std::unique_ptr<column> empty_like(scalar const& input);
 
@@ -264,7 +264,7 @@ std::unique_ptr<column> allocate_like(
  * memory for the column's data or bitmask.
  *
  * @param[in] input_table Immutable view of input table to emulate
- * @return std::unique_ptr<table> A table of empty columns with the same types as the columns in
+ * @return A table of empty columns with the same types as the columns in
  * `input_table`
  */
 std::unique_ptr<table> empty_like(table_view const& input_table);
@@ -333,7 +333,7 @@ void copy_range_in_place(column_view const& source,
  * (exclusive)
  * @param target_begin The starting index of the target range (inclusive)
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return std::unique_ptr<column> The result target column
+ * @return The result target column
  */
 std::unique_ptr<column> copy_range(
   column_view const& source,
@@ -920,7 +920,7 @@ std::unique_ptr<table> boolean_mask_scatter(
  * @param input Column view to get the element from
  * @param index Index into `input` to get the element at
  * @param mr Device memory resource used to allocate the returned scalar's device memory
- * @return std::unique_ptr<scalar> Scalar containing the single value
+ * @return Scalar containing the single value
  */
 std::unique_ptr<scalar> get_element(
   column_view const& input,
@@ -960,7 +960,7 @@ enum class sample_with_replacement : bool {
  * @param seed Seed value to initiate random number generator
  * @param mr Device memory resource used to allocate the returned table's device memory
  *
- * @return std::unique_ptr<table> Table containing samples from `input`
+ * @return Table containing samples from `input`
  */
 std::unique_ptr<table> sample(
   table_view const& input,
diff --git a/cpp/include/cudf/io/data_sink.hpp b/cpp/include/cudf/io/data_sink.hpp
index cf3e94029be..0be2935b84c 100644
--- a/cpp/include/cudf/io/data_sink.hpp
+++ b/cpp/include/cudf/io/data_sink.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -123,7 +123,7 @@ class data_sink {
    * instead of write() when possible.  However, it is still possible to receive
    * write() calls as well.
    *
-   * @return bool If this writer supports device_write() calls
+   * @return If this writer supports device_write() calls
    */
   [[nodiscard]] virtual bool supports_device_write() const { return false; }
 
@@ -194,7 +194,7 @@ class data_sink {
   /**
    * @pure @brief Returns the total number of bytes written into this sink
    *
-   * @return size_t Total number of bytes written into this sink
+   * @return Total number of bytes written into this sink
    */
   virtual size_t bytes_written() = 0;
 };
diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp
index a0ef2155f7d..12b8377bff2 100644
--- a/cpp/include/cudf/io/datasource.hpp
+++ b/cpp/include/cudf/io/datasource.hpp
@@ -296,14 +296,14 @@ class datasource {
   /**
    * @brief Returns the size of the data in the source.
    *
-   * @return size_t The size of the source data in bytes
+   * @return The size of the source data in bytes
    */
   [[nodiscard]] virtual size_t size() const = 0;
 
   /**
    * @brief Returns whether the source contains any data.
    *
-   * @return bool True if there is data, False otherwise
+   * @return True if there is data, False otherwise
    */
   [[nodiscard]] virtual bool is_empty() const { return size() == 0; }
 
diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp
index a9f06bf399c..fbe931f945d 100644
--- a/cpp/include/cudf/lists/contains.hpp
+++ b/cpp/include/cudf/lists/contains.hpp
@@ -43,7 +43,7 @@ namespace lists {
  * @param lists Lists column whose `n` rows are to be searched
  * @param search_key The scalar key to be looked up in each list row
  * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return std::unique_ptr<column> BOOL8 column of `n` rows with the result of the lookup
+ * @return BOOL8 column of `n` rows with the result of the lookup
  */
 std::unique_ptr<column> contains(
   cudf::lists_column_view const& lists,
@@ -65,7 +65,7 @@ std::unique_ptr<column> contains(
  * @param lists Lists column whose `n` rows are to be searched
  * @param search_keys Column of elements to be looked up in each list row
  * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return std::unique_ptr<column> BOOL8 column of `n` rows with the result of the lookup
+ * @return BOOL8 column of `n` rows with the result of the lookup
  */
 std::unique_ptr<column> contains(
   cudf::lists_column_view const& lists,
@@ -86,7 +86,7 @@ std::unique_ptr<column> contains(
  *
  * @param lists Lists column whose `n` rows are to be searched
  * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return std::unique_ptr<column> BOOL8 column of `n` rows with the result of the lookup
+ * @return BOOL8 column of `n` rows with the result of the lookup
  */
 std::unique_ptr<column> contains_nulls(
   cudf::lists_column_view const& lists,
@@ -124,7 +124,7 @@ enum class duplicate_find_option : int32_t {
  * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or
  * last (`FIND_LAST`)
  * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return std::unique_ptr<column> INT32 column of `n` rows with the location of the `search_key`
+ * @return INT32 column of `n` rows with the location of the `search_key`
  *
  * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists`
  */
@@ -158,7 +158,7 @@ std::unique_ptr<column> index_of(
  * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or
  * last (`FIND_LAST`)
  * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return std::unique_ptr<column> INT32 column of `n` rows with the location of the `search_key`
+ * @return INT32 column of `n` rows with the location of the `search_key`
  *
  * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows
  * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists`
diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp
index 6b74a0e600a..336214e3934 100644
--- a/cpp/include/cudf/lists/lists_column_view.hpp
+++ b/cpp/include/cudf/lists/lists_column_view.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -115,7 +115,7 @@ class lists_column_view : private column_view {
   /**
    * @brief Return first offset (accounting for column offset)
    *
-   * @return int32_t const* Pointer to the first offset
+   * @return Pointer to the first offset
    */
   [[nodiscard]] offset_iterator offsets_begin() const noexcept
   {
@@ -130,7 +130,7 @@ class lists_column_view : private column_view {
    * be computed using the size of the offsets() child column, which is also the offsets of the
    * entire original (non-sliced) lists column.
    *
-   * @return int32_t const* Pointer to one past the last offset
+   * @return Pointer to one past the last offset
    */
   [[nodiscard]] offset_iterator offsets_end() const noexcept
   {
diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp
index bd4ce28a2ef..360006c1eea 100644
--- a/cpp/include/cudf/null_mask.hpp
+++ b/cpp/include/cudf/null_mask.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ namespace cudf {
  *
  * @param state The state of the null mask
  * @param size The number of elements represented by the mask
- * @return size_type The count of null elements
+ * @return The count of null elements
  */
 size_type state_null_count(mask_state state, size_type size);
 
@@ -52,7 +52,7 @@ size_type state_null_count(mask_state state, size_type size);
  * @param number_of_bits The number of bits that need to be represented
  * @param padding_boundary The value returned will be rounded up to a multiple
  * of this value
- * @return std::size_t The necessary number of bytes
+ * @return The necessary number of bytes
  */
 std::size_t bitmask_allocation_size_bytes(size_type number_of_bits,
                                           std::size_t padding_boundary = 64);
@@ -68,7 +68,7 @@ std::size_t bitmask_allocation_size_bytes(size_type number_of_bits,
  * in a bitmask and ignore the padding/slack bits.
  *
  * @param number_of_bits The number of bits that need to be represented
- * @return size_type The necessary number of `bitmask_type` elements
+ * @return The necessary number of `bitmask_type` elements
  */
 size_type num_bitmask_words(size_type number_of_bits);
 
@@ -79,7 +79,7 @@ size_type num_bitmask_words(size_type number_of_bits);
  * @param size The number of elements to be represented by the mask
  * @param state The desired state of the mask
  * @param mr Device memory resource used to allocate the returned device_buffer
- * @return rmm::device_buffer A `device_buffer` for use as a null bitmask
+ * @return A `device_buffer` for use as a null bitmask
  * satisfying the desired size and state
  */
 rmm::device_buffer create_null_mask(
@@ -114,7 +114,7 @@ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit
  * @param begin_bit Index of the first bit to be copied (inclusive)
  * @param end_bit Index of the last bit to be copied (exclusive)
  * @param mr Device memory resource used to allocate the returned device_buffer
- * @return rmm::device_buffer A `device_buffer` containing the bits
+ * @return A `device_buffer` containing the bits
  * `[begin_bit, end_bit)` from `mask`.
  */
 rmm::device_buffer copy_bitmask(
@@ -131,7 +131,7 @@ rmm::device_buffer copy_bitmask(
  *
  * @param view Column view whose bitmask needs to be copied
  * @param mr Device memory resource used to allocate the returned device_buffer
- * @return rmm::device_buffer A `device_buffer` containing the bits
+ * @return A `device_buffer` containing the bits
  * `[view.offset(), view.offset() + view.size())` from `view`'s bitmask.
  */
 rmm::device_buffer copy_bitmask(
diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp
index 29e5c1ab808..030d3d42773 100644
--- a/cpp/include/cudf/round.hpp
+++ b/cpp/include/cudf/round.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -67,7 +67,7 @@ enum class rounding_method : int32_t { HALF_UP, HALF_EVEN };
  * @param method         Rounding method
  * @param mr             Device memory resource used to allocate the returned column's device memory
  *
- * @return std::unique_ptr<column> Column with each of the values rounded
+ * @return Column with each of the values rounded
  */
 std::unique_ptr<column> round(
   column_view const& input,
diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index f43089210fd..922bed3b1ea 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -190,10 +190,9 @@ std::unique_ptr<table> stable_sort_by_key(
  * for column
  * @param percentage flag to convert ranks to percentage in range (0,1]
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return std::unique_ptr<column> A column of containing the rank of the each
- * element of the column of `input`. The output column type will be `size_type`
- * column by default or else `double` when `method=rank_method::AVERAGE` or
- *`percentage=True`
+ * @return A column of containing the rank of the each element of the column of `input`. The output
+ * column type will be `size_type`column by default or else `double` when
+ * `method=rank_method::AVERAGE` or `percentage=True`
  */
 std::unique_ptr<column> rank(
   column_view const& input,
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 2a207d2a5c4..58f20adb923 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -756,9 +756,8 @@ struct preprocessed_table {
   /**
    * @brief Get a device array containing the desired order of each column in the preprocessed table
    *
-   * @return std::optional<device_span<order const>> Device array containing respective column
-   * orders. If no explicit column orders were specified during the creation of this object then
-   * this will be `nullopt`.
+   * @return Device array containing respective column orders. If no explicit column orders were
+   * specified during the creation of this object then this will be `nullopt`.
    */
   [[nodiscard]] std::optional<device_span<order const>> column_order() const
   {
@@ -770,9 +769,8 @@ struct preprocessed_table {
    * @brief Get a device array containing the desired null precedence of each column in the
    * preprocessed table
    *
-   * @return std::optional<device_span<null_order const>> Device array containing respective column
-   * null precedence. If no explicit column null precedences were specified during the creation of
-   * this object then this will be `nullopt`.
+   * @return Device array containing respective column null precedence. If no explicit column null
+   * precedences were specified during the creation of this object then this will be `nullopt`.
    */
   [[nodiscard]] std::optional<device_span<null_order const>> null_precedence() const
   {
diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp
index 969bec84716..412fe17ef26 100644
--- a/cpp/include/cudf/transform.hpp
+++ b/cpp/include/cudf/transform.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -82,7 +82,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
  * @param table The table used for expression evaluation
  * @param expr The root of the expression tree
  * @param mr Device memory resource
- * @return std::unique_ptr<column> Output column
+ * @return Output column
  */
 std::unique_ptr<column> compute_column(
   table_view const& table,
diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index 8a1e4c9aee7..3bc1f9d6da7 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -89,7 +89,7 @@ using thread_index_type = int64_t;   ///< Thread index type in kernels
  * @tparam T Iterator type
  * @param f "first" iterator
  * @param l "last" iterator
- * @return size_type The distance between first and last
+ * @return The distance between first and last
  */
 template <typename T>
 size_type distance(T f, T l)

	numbers	business
0	-316	Costco
1	-441	Costco
2	653	Buckees
3	216	Buckees
4	-165	Walmart
...	...	...
299999995	-395	Walmart
299999996	-653	Buckees
299999997	364	Buckees
299999998	159	Buckees
299999999	-501	Walmart
	cudf speedup vs. pandas
value_counts	282.901300
concat	203.624680
groupby	138.495762
merge	136.519031
	age
0	87
1	71
2	63
3	40
4	92
...	...
9999995	4
9999996	28
9999997	31
9999998	4
9999999	47